rf_netbsdkintf.c revision 1.231 1 /* $NetBSD: rf_netbsdkintf.c,v 1.231 2007/10/05 01:40:04 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.231 2007/10/05 01:40:04 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217 static int raid_match(struct device *, struct cfdata *, void *);
218 static void raid_attach(struct device *, struct device *, void *);
219 static int raid_detach(struct device *, int);
220
221 dev_type_open(raidopen);
222 dev_type_close(raidclose);
223 dev_type_read(raidread);
224 dev_type_write(raidwrite);
225 dev_type_ioctl(raidioctl);
226 dev_type_strategy(raidstrategy);
227 dev_type_dump(raiddump);
228 dev_type_size(raidsize);
229
230 const struct bdevsw raid_bdevsw = {
231 raidopen, raidclose, raidstrategy, raidioctl,
232 raiddump, raidsize, D_DISK
233 };
234
235 const struct cdevsw raid_cdevsw = {
236 raidopen, raidclose, raidread, raidwrite, raidioctl,
237 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
238 };
239
240 /* XXX Not sure if the following should be replacing the raidPtrs above,
241 or if it should be used in conjunction with that...
242 */
243
244 struct raid_softc {
245 struct device *sc_dev;
246 int sc_flags; /* flags */
247 int sc_cflags; /* configuration flags */
248 uint64_t sc_size; /* size of the raid device */
249 char sc_xname[20]; /* XXX external name */
250 struct disk sc_dkdev; /* generic disk device info */
251 struct bufq_state *buf_queue; /* used for the device queue */
252 };
253 /* sc_flags */
254 #define RAIDF_INITED 0x01 /* unit has been initialized */
255 #define RAIDF_WLABEL 0x02 /* label area is writable */
256 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
257 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
258 #define RAIDF_LOCKED 0x80 /* unit is locked */
259
260 #define raidunit(x) DISKUNIT(x)
261 int numraid = 0;
262
263 extern struct cfdriver raid_cd;
264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
265 raid_match, raid_attach, raid_detach, NULL);
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
295 struct disklabel *);
296 static void raidgetdisklabel(dev_t);
297 static void raidmakedisklabel(struct raid_softc *);
298
299 static int raidlock(struct raid_softc *);
300 static void raidunlock(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(struct device *self);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static int rf_reasonable_label(RF_ComponentLabel_t *);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
322
323 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
324 allow autoconfig to take place.
325 Note that this is overridden by having
326 RAID_AUTOCONFIG as an option in the
327 kernel config file. */
328
329 struct RF_Pools_s rf_pools;
330
331 void
332 raidattach(int num)
333 {
334 int raidID;
335 int i, rc;
336
337 #ifdef DEBUG
338 printf("raidattach: Asked for %d units\n", num);
339 #endif
340
341 if (num <= 0) {
342 #ifdef DIAGNOSTIC
343 panic("raidattach: count <= 0");
344 #endif
345 return;
346 }
347 /* This is where all the initialization stuff gets done. */
348
349 numraid = num;
350
351 /* Make some space for requested number of units... */
352
353 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
354 if (raidPtrs == NULL) {
355 panic("raidPtrs is NULL!!");
356 }
357
358 rf_mutex_init(&rf_sparet_wait_mutex);
359
360 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
361
362 for (i = 0; i < num; i++)
363 raidPtrs[i] = NULL;
364 rc = rf_BootRaidframe();
365 if (rc == 0)
366 printf("Kernelized RAIDframe activated\n");
367 else
368 panic("Serious error booting RAID!!");
369
370 /* put together some datastructures like the CCD device does.. This
371 * lets us lock the device and what-not when it gets opened. */
372
373 raid_softc = (struct raid_softc *)
374 malloc(num * sizeof(struct raid_softc),
375 M_RAIDFRAME, M_NOWAIT);
376 if (raid_softc == NULL) {
377 printf("WARNING: no memory for RAIDframe driver\n");
378 return;
379 }
380
381 memset(raid_softc, 0, num * sizeof(struct raid_softc));
382
383 for (raidID = 0; raidID < num; raidID++) {
384 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
385
386 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
396 printf("config_cfattach_attach failed?\n");
397 }
398
399 #ifdef RAID_AUTOCONFIG
400 raidautoconfig = 1;
401 #endif
402
403 /*
404 * Register a finalizer which will be used to auto-config RAID
405 * sets once all real hardware devices have been found.
406 */
407 if (config_finalize_register(NULL, rf_autoconfig) != 0)
408 printf("WARNING: unable to register RAIDframe finalizer\n");
409 }
410
411 int
412 rf_autoconfig(struct device *self)
413 {
414 RF_AutoConfig_t *ac_list;
415 RF_ConfigSet_t *config_sets;
416 int i;
417
418 if (raidautoconfig == 0)
419 return (0);
420
421 /* XXX This code can only be run once. */
422 raidautoconfig = 0;
423
424 /* 1. locate all RAID components on the system */
425 #ifdef DEBUG
426 printf("Searching for RAID components...\n");
427 #endif
428 ac_list = rf_find_raid_components();
429
430 /* 2. Sort them into their respective sets. */
431 config_sets = rf_create_auto_sets(ac_list);
432
433 /*
434 * 3. Evaluate each set andconfigure the valid ones.
435 * This gets done in rf_buildroothack().
436 */
437 rf_buildroothack(config_sets);
438
439 for (i = 0; i < numraid; i++)
440 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
441 dkwedge_discover(&raid_softc[i].sc_dkdev);
442
443 return 1;
444 }
445
446 void
447 rf_buildroothack(RF_ConfigSet_t *config_sets)
448 {
449 RF_ConfigSet_t *cset;
450 RF_ConfigSet_t *next_cset;
451 int retcode;
452 int raidID;
453 int rootID;
454 int col;
455 int num_root;
456 char *devname;
457
458 rootID = 0;
459 num_root = 0;
460 cset = config_sets;
461 while(cset != NULL ) {
462 next_cset = cset->next;
463 if (rf_have_enough_components(cset) &&
464 cset->ac->clabel->autoconfigure==1) {
465 retcode = rf_auto_config_set(cset,&raidID);
466 if (!retcode) {
467 #ifdef DEBUG
468 printf("raid%d: configured ok\n", raidID);
469 #endif
470 if (cset->rootable) {
471 rootID = raidID;
472 num_root++;
473 }
474 } else {
475 /* The autoconfig didn't work :( */
476 #ifdef DEBUG
477 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
478 #endif
479 rf_release_all_vps(cset);
480 }
481 } else {
482 #ifdef DEBUG
483 printf("raid%d: not enough components\n", raidID);
484 #endif
485 /* we're not autoconfiguring this set...
486 release the associated resources */
487 rf_release_all_vps(cset);
488 }
489 /* cleanup */
490 rf_cleanup_config_set(cset);
491 cset = next_cset;
492 }
493
494 /* if the user has specified what the root device should be
495 then we don't touch booted_device or boothowto... */
496
497 if (rootspec != NULL)
498 return;
499
500 /* we found something bootable... */
501
502 if (num_root == 1) {
503 booted_device = raid_softc[rootID].sc_dev;
504 } else if (num_root > 1) {
505
506 /*
507 * Maybe the MD code can help. If it cannot, then
508 * setroot() will discover that we have no
509 * booted_device and will ask the user if nothing was
510 * hardwired in the kernel config file
511 */
512
513 if (booted_device == NULL)
514 cpu_rootconf();
515 if (booted_device == NULL)
516 return;
517
518 num_root = 0;
519 for (raidID = 0; raidID < numraid; raidID++) {
520 if (raidPtrs[raidID]->valid == 0)
521 continue;
522
523 if (raidPtrs[raidID]->root_partition == 0)
524 continue;
525
526 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
527 devname = raidPtrs[raidID]->Disks[col].devname;
528 devname += sizeof("/dev/") - 1;
529 if (strncmp(devname, booted_device->dv_xname,
530 strlen(booted_device->dv_xname)) != 0)
531 continue;
532 #ifdef DEBUG
533 printf("raid%d includes boot device %s\n",
534 raidID, devname);
535 #endif
536 num_root++;
537 rootID = raidID;
538 }
539 }
540
541 if (num_root == 1) {
542 booted_device = raid_softc[rootID].sc_dev;
543 } else {
544 /* we can't guess.. require the user to answer... */
545 boothowto |= RB_ASKNAME;
546 }
547 }
548 }
549
550
551 int
552 raidsize(dev_t dev)
553 {
554 struct raid_softc *rs;
555 struct disklabel *lp;
556 int part, unit, omask, size;
557
558 unit = raidunit(dev);
559 if (unit >= numraid)
560 return (-1);
561 rs = &raid_softc[unit];
562
563 if ((rs->sc_flags & RAIDF_INITED) == 0)
564 return (-1);
565
566 part = DISKPART(dev);
567 omask = rs->sc_dkdev.dk_openmask & (1 << part);
568 lp = rs->sc_dkdev.dk_label;
569
570 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
571 return (-1);
572
573 if (lp->d_partitions[part].p_fstype != FS_SWAP)
574 size = -1;
575 else
576 size = lp->d_partitions[part].p_size *
577 (lp->d_secsize / DEV_BSIZE);
578
579 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
580 return (-1);
581
582 return (size);
583
584 }
585
586 int
587 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
588 {
589 int unit = raidunit(dev);
590 struct raid_softc *rs;
591 const struct bdevsw *bdev;
592 struct disklabel *lp;
593 RF_Raid_t *raidPtr;
594 daddr_t offset;
595 int part, c, sparecol, j, scol, dumpto;
596 int error = 0;
597
598 if (unit >= numraid)
599 return (ENXIO);
600
601 rs = &raid_softc[unit];
602 raidPtr = raidPtrs[unit];
603
604 if ((rs->sc_flags & RAIDF_INITED) == 0)
605 return ENXIO;
606
607 /* we only support dumping to RAID 1 sets */
608 if (raidPtr->Layout.numDataCol != 1 ||
609 raidPtr->Layout.numParityCol != 1)
610 return EINVAL;
611
612
613 if ((error = raidlock(rs)) != 0)
614 return error;
615
616 if (size % DEV_BSIZE != 0) {
617 error = EINVAL;
618 goto out;
619 }
620
621 if (blkno + size / DEV_BSIZE > rs->sc_size) {
622 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
623 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
624 size / DEV_BSIZE, rs->sc_size);
625 error = EINVAL;
626 goto out;
627 }
628
629 part = DISKPART(dev);
630 lp = rs->sc_dkdev.dk_label;
631 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
632
633 /* figure out what device is alive.. */
634
635 /*
636 Look for a component to dump to. The preference for the
637 component to dump to is as follows:
638 1) the master
639 2) a used_spare of the master
640 3) the slave
641 4) a used_spare of the slave
642 */
643
644 dumpto = -1;
645 for (c = 0; c < raidPtr->numCol; c++) {
646 if (raidPtr->Disks[c].status == rf_ds_optimal) {
647 /* this might be the one */
648 dumpto = c;
649 break;
650 }
651 }
652
653 /*
654 At this point we have possibly selected a live master or a
655 live slave. We now check to see if there is a spared
656 master (or a spared slave), if we didn't find a live master
657 or a live slave.
658 */
659
660 for (c = 0; c < raidPtr->numSpare; c++) {
661 sparecol = raidPtr->numCol + c;
662 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
663 /* How about this one? */
664 scol = -1;
665 for(j=0;j<raidPtr->numCol;j++) {
666 if (raidPtr->Disks[j].spareCol == sparecol) {
667 scol = j;
668 break;
669 }
670 }
671 if (scol == 0) {
672 /*
673 We must have found a spared master!
674 We'll take that over anything else
675 found so far. (We couldn't have
676 found a real master before, since
677 this is a used spare, and it's
678 saying that it's replacing the
679 master.) On reboot (with
680 autoconfiguration turned on)
681 sparecol will become the 1st
682 component (component0) of this set.
683 */
684 dumpto = sparecol;
685 break;
686 } else if (scol != -1) {
687 /*
688 Must be a spared slave. We'll dump
689 to that if we havn't found anything
690 else so far.
691 */
692 if (dumpto == -1)
693 dumpto = sparecol;
694 }
695 }
696 }
697
698 if (dumpto == -1) {
699 /* we couldn't find any live components to dump to!?!?
700 */
701 error = EINVAL;
702 goto out;
703 }
704
705 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
706
707 /*
708 Note that blkno is relative to this particular partition.
709 By adding the offset of this partition in the RAID
710 set, and also adding RF_PROTECTED_SECTORS, we get a
711 value that is relative to the partition used for the
712 underlying component.
713 */
714
715 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
716 blkno + offset, va, size);
717
718 out:
719 raidunlock(rs);
720
721 return error;
722 }
723 /* ARGSUSED */
724 int
725 raidopen(dev_t dev, int flags, int fmt,
726 struct lwp *l)
727 {
728 int unit = raidunit(dev);
729 struct raid_softc *rs;
730 struct disklabel *lp;
731 int part, pmask;
732 int error = 0;
733
734 if (unit >= numraid)
735 return (ENXIO);
736 rs = &raid_softc[unit];
737
738 if ((error = raidlock(rs)) != 0)
739 return (error);
740 lp = rs->sc_dkdev.dk_label;
741
742 part = DISKPART(dev);
743
744 /*
745 * If there are wedges, and this is not RAW_PART, then we
746 * need to fail.
747 */
748 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
749 error = EBUSY;
750 goto bad;
751 }
752 pmask = (1 << part);
753
754 if ((rs->sc_flags & RAIDF_INITED) &&
755 (rs->sc_dkdev.dk_openmask == 0))
756 raidgetdisklabel(dev);
757
758 /* make sure that this partition exists */
759
760 if (part != RAW_PART) {
761 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
762 ((part >= lp->d_npartitions) ||
763 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
764 error = ENXIO;
765 goto bad;
766 }
767 }
768 /* Prevent this unit from being unconfigured while open. */
769 switch (fmt) {
770 case S_IFCHR:
771 rs->sc_dkdev.dk_copenmask |= pmask;
772 break;
773
774 case S_IFBLK:
775 rs->sc_dkdev.dk_bopenmask |= pmask;
776 break;
777 }
778
779 if ((rs->sc_dkdev.dk_openmask == 0) &&
780 ((rs->sc_flags & RAIDF_INITED) != 0)) {
781 /* First one... mark things as dirty... Note that we *MUST*
782 have done a configure before this. I DO NOT WANT TO BE
783 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
784 THAT THEY BELONG TOGETHER!!!!! */
785 /* XXX should check to see if we're only open for reading
786 here... If so, we needn't do this, but then need some
787 other way of keeping track of what's happened.. */
788
789 rf_markalldirty( raidPtrs[unit] );
790 }
791
792
793 rs->sc_dkdev.dk_openmask =
794 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
795
796 bad:
797 raidunlock(rs);
798
799 return (error);
800
801
802 }
803 /* ARGSUSED */
804 int
805 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
806 {
807 int unit = raidunit(dev);
808 struct cfdata *cf;
809 struct raid_softc *rs;
810 int error = 0;
811 int part;
812
813 if (unit >= numraid)
814 return (ENXIO);
815 rs = &raid_softc[unit];
816
817 if ((error = raidlock(rs)) != 0)
818 return (error);
819
820 part = DISKPART(dev);
821
822 /* ...that much closer to allowing unconfiguration... */
823 switch (fmt) {
824 case S_IFCHR:
825 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
826 break;
827
828 case S_IFBLK:
829 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
830 break;
831 }
832 rs->sc_dkdev.dk_openmask =
833 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
834
835 if ((rs->sc_dkdev.dk_openmask == 0) &&
836 ((rs->sc_flags & RAIDF_INITED) != 0)) {
837 /* Last one... device is not unconfigured yet.
838 Device shutdown has taken care of setting the
839 clean bits if RAIDF_INITED is not set
840 mark things as clean... */
841
842 rf_update_component_labels(raidPtrs[unit],
843 RF_FINAL_COMPONENT_UPDATE);
844 if (doing_shutdown) {
845 /* last one, and we're going down, so
846 lights out for this RAID set too. */
847 error = rf_Shutdown(raidPtrs[unit]);
848
849 /* It's no longer initialized... */
850 rs->sc_flags &= ~RAIDF_INITED;
851
852 /* detach the device */
853
854 cf = device_cfdata(rs->sc_dev);
855 error = config_detach(rs->sc_dev, DETACH_QUIET);
856 free(cf, M_RAIDFRAME);
857
858 /* Detach the disk. */
859 pseudo_disk_detach(&rs->sc_dkdev);
860 }
861 }
862
863 raidunlock(rs);
864 return (0);
865
866 }
867
868 void
869 raidstrategy(struct buf *bp)
870 {
871 int s;
872
873 unsigned int raidID = raidunit(bp->b_dev);
874 RF_Raid_t *raidPtr;
875 struct raid_softc *rs = &raid_softc[raidID];
876 int wlabel;
877
878 if ((rs->sc_flags & RAIDF_INITED) ==0) {
879 bp->b_error = ENXIO;
880 goto done;
881 }
882 if (raidID >= numraid || !raidPtrs[raidID]) {
883 bp->b_error = ENODEV;
884 goto done;
885 }
886 raidPtr = raidPtrs[raidID];
887 if (!raidPtr->valid) {
888 bp->b_error = ENODEV;
889 goto done;
890 }
891 if (bp->b_bcount == 0) {
892 db1_printf(("b_bcount is zero..\n"));
893 goto done;
894 }
895
896 /*
897 * Do bounds checking and adjust transfer. If there's an
898 * error, the bounds check will flag that for us.
899 */
900
901 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
902 if (DISKPART(bp->b_dev) == RAW_PART) {
903 uint64_t size; /* device size in DEV_BSIZE unit */
904
905 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
906 size = raidPtr->totalSectors <<
907 (raidPtr->logBytesPerSector - DEV_BSHIFT);
908 } else {
909 size = raidPtr->totalSectors >>
910 (DEV_BSHIFT - raidPtr->logBytesPerSector);
911 }
912 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
913 goto done;
914 }
915 } else {
916 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
917 db1_printf(("Bounds check failed!!:%d %d\n",
918 (int) bp->b_blkno, (int) wlabel));
919 goto done;
920 }
921 }
922 s = splbio();
923
924 bp->b_resid = 0;
925
926 /* stuff it onto our queue */
927 BUFQ_PUT(rs->buf_queue, bp);
928
929 /* scheduled the IO to happen at the next convenient time */
930 wakeup(&(raidPtrs[raidID]->iodone));
931
932 splx(s);
933 return;
934
935 done:
936 bp->b_resid = bp->b_bcount;
937 biodone(bp);
938 }
939 /* ARGSUSED */
940 int
941 raidread(dev_t dev, struct uio *uio, int flags)
942 {
943 int unit = raidunit(dev);
944 struct raid_softc *rs;
945
946 if (unit >= numraid)
947 return (ENXIO);
948 rs = &raid_softc[unit];
949
950 if ((rs->sc_flags & RAIDF_INITED) == 0)
951 return (ENXIO);
952
953 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
954
955 }
956 /* ARGSUSED */
957 int
958 raidwrite(dev_t dev, struct uio *uio, int flags)
959 {
960 int unit = raidunit(dev);
961 struct raid_softc *rs;
962
963 if (unit >= numraid)
964 return (ENXIO);
965 rs = &raid_softc[unit];
966
967 if ((rs->sc_flags & RAIDF_INITED) == 0)
968 return (ENXIO);
969
970 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
971
972 }
973
974 int
975 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
976 {
977 int unit = raidunit(dev);
978 int error = 0;
979 int part, pmask;
980 struct cfdata *cf;
981 struct raid_softc *rs;
982 RF_Config_t *k_cfg, *u_cfg;
983 RF_Raid_t *raidPtr;
984 RF_RaidDisk_t *diskPtr;
985 RF_AccTotals_t *totals;
986 RF_DeviceConfig_t *d_cfg, **ucfgp;
987 u_char *specific_buf;
988 int retcode = 0;
989 int column;
990 int raidid;
991 struct rf_recon_req *rrcopy, *rr;
992 RF_ComponentLabel_t *clabel;
993 RF_ComponentLabel_t *ci_label;
994 RF_ComponentLabel_t **clabel_ptr;
995 RF_SingleComponent_t *sparePtr,*componentPtr;
996 RF_SingleComponent_t component;
997 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
998 int i, j, d;
999 #ifdef __HAVE_OLD_DISKLABEL
1000 struct disklabel newlabel;
1001 #endif
1002 struct dkwedge_info *dkw;
1003
1004 if (unit >= numraid)
1005 return (ENXIO);
1006 rs = &raid_softc[unit];
1007 raidPtr = raidPtrs[unit];
1008
1009 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
1010 (int) DISKPART(dev), (int) unit, (int) cmd));
1011
1012 /* Must be open for writes for these commands... */
1013 switch (cmd) {
1014 #ifdef DIOCGSECTORSIZE
1015 case DIOCGSECTORSIZE:
1016 *(u_int *)data = raidPtr->bytesPerSector;
1017 return 0;
1018 case DIOCGMEDIASIZE:
1019 *(off_t *)data =
1020 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1021 return 0;
1022 #endif
1023 case DIOCSDINFO:
1024 case DIOCWDINFO:
1025 #ifdef __HAVE_OLD_DISKLABEL
1026 case ODIOCWDINFO:
1027 case ODIOCSDINFO:
1028 #endif
1029 case DIOCWLABEL:
1030 case DIOCAWEDGE:
1031 case DIOCDWEDGE:
1032 if ((flag & FWRITE) == 0)
1033 return (EBADF);
1034 }
1035
1036 /* Must be initialized for these... */
1037 switch (cmd) {
1038 case DIOCGDINFO:
1039 case DIOCSDINFO:
1040 case DIOCWDINFO:
1041 #ifdef __HAVE_OLD_DISKLABEL
1042 case ODIOCGDINFO:
1043 case ODIOCWDINFO:
1044 case ODIOCSDINFO:
1045 case ODIOCGDEFLABEL:
1046 #endif
1047 case DIOCGPART:
1048 case DIOCWLABEL:
1049 case DIOCGDEFLABEL:
1050 case DIOCAWEDGE:
1051 case DIOCDWEDGE:
1052 case DIOCLWEDGES:
1053 case RAIDFRAME_SHUTDOWN:
1054 case RAIDFRAME_REWRITEPARITY:
1055 case RAIDFRAME_GET_INFO:
1056 case RAIDFRAME_RESET_ACCTOTALS:
1057 case RAIDFRAME_GET_ACCTOTALS:
1058 case RAIDFRAME_KEEP_ACCTOTALS:
1059 case RAIDFRAME_GET_SIZE:
1060 case RAIDFRAME_FAIL_DISK:
1061 case RAIDFRAME_COPYBACK:
1062 case RAIDFRAME_CHECK_RECON_STATUS:
1063 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1064 case RAIDFRAME_GET_COMPONENT_LABEL:
1065 case RAIDFRAME_SET_COMPONENT_LABEL:
1066 case RAIDFRAME_ADD_HOT_SPARE:
1067 case RAIDFRAME_REMOVE_HOT_SPARE:
1068 case RAIDFRAME_INIT_LABELS:
1069 case RAIDFRAME_REBUILD_IN_PLACE:
1070 case RAIDFRAME_CHECK_PARITY:
1071 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1072 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1073 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1074 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1075 case RAIDFRAME_SET_AUTOCONFIG:
1076 case RAIDFRAME_SET_ROOT:
1077 case RAIDFRAME_DELETE_COMPONENT:
1078 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1079 if ((rs->sc_flags & RAIDF_INITED) == 0)
1080 return (ENXIO);
1081 }
1082
1083 switch (cmd) {
1084
1085 /* configure the system */
1086 case RAIDFRAME_CONFIGURE:
1087
1088 if (raidPtr->valid) {
1089 /* There is a valid RAID set running on this unit! */
1090 printf("raid%d: Device already configured!\n",unit);
1091 return(EINVAL);
1092 }
1093
1094 /* copy-in the configuration information */
1095 /* data points to a pointer to the configuration structure */
1096
1097 u_cfg = *((RF_Config_t **) data);
1098 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1099 if (k_cfg == NULL) {
1100 return (ENOMEM);
1101 }
1102 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1103 if (retcode) {
1104 RF_Free(k_cfg, sizeof(RF_Config_t));
1105 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1106 retcode));
1107 return (retcode);
1108 }
1109 /* allocate a buffer for the layout-specific data, and copy it
1110 * in */
1111 if (k_cfg->layoutSpecificSize) {
1112 if (k_cfg->layoutSpecificSize > 10000) {
1113 /* sanity check */
1114 RF_Free(k_cfg, sizeof(RF_Config_t));
1115 return (EINVAL);
1116 }
1117 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1118 (u_char *));
1119 if (specific_buf == NULL) {
1120 RF_Free(k_cfg, sizeof(RF_Config_t));
1121 return (ENOMEM);
1122 }
1123 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1124 k_cfg->layoutSpecificSize);
1125 if (retcode) {
1126 RF_Free(k_cfg, sizeof(RF_Config_t));
1127 RF_Free(specific_buf,
1128 k_cfg->layoutSpecificSize);
1129 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1130 retcode));
1131 return (retcode);
1132 }
1133 } else
1134 specific_buf = NULL;
1135 k_cfg->layoutSpecific = specific_buf;
1136
1137 /* should do some kind of sanity check on the configuration.
1138 * Store the sum of all the bytes in the last byte? */
1139
1140 /* configure the system */
1141
1142 /*
1143 * Clear the entire RAID descriptor, just to make sure
1144 * there is no stale data left in the case of a
1145 * reconfiguration
1146 */
1147 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1148 raidPtr->raidid = unit;
1149
1150 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1151
1152 if (retcode == 0) {
1153
1154 /* allow this many simultaneous IO's to
1155 this RAID device */
1156 raidPtr->openings = RAIDOUTSTANDING;
1157
1158 raidinit(raidPtr);
1159 rf_markalldirty(raidPtr);
1160 }
1161 /* free the buffers. No return code here. */
1162 if (k_cfg->layoutSpecificSize) {
1163 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1164 }
1165 RF_Free(k_cfg, sizeof(RF_Config_t));
1166
1167 return (retcode);
1168
1169 /* shutdown the system */
1170 case RAIDFRAME_SHUTDOWN:
1171
1172 if ((error = raidlock(rs)) != 0)
1173 return (error);
1174
1175 /*
1176 * If somebody has a partition mounted, we shouldn't
1177 * shutdown.
1178 */
1179
1180 part = DISKPART(dev);
1181 pmask = (1 << part);
1182 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1183 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1184 (rs->sc_dkdev.dk_copenmask & pmask))) {
1185 raidunlock(rs);
1186 return (EBUSY);
1187 }
1188
1189 retcode = rf_Shutdown(raidPtr);
1190
1191 /* It's no longer initialized... */
1192 rs->sc_flags &= ~RAIDF_INITED;
1193
1194 /* free the pseudo device attach bits */
1195
1196 cf = device_cfdata(rs->sc_dev);
1197 /* XXX this causes us to not return any errors
1198 from the above call to rf_Shutdown() */
1199 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1200 free(cf, M_RAIDFRAME);
1201
1202 /* Detach the disk. */
1203 pseudo_disk_detach(&rs->sc_dkdev);
1204
1205 raidunlock(rs);
1206
1207 return (retcode);
1208 case RAIDFRAME_GET_COMPONENT_LABEL:
1209 clabel_ptr = (RF_ComponentLabel_t **) data;
1210 /* need to read the component label for the disk indicated
1211 by row,column in clabel */
1212
1213 /* For practice, let's get it directly fromdisk, rather
1214 than from the in-core copy */
1215 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1216 (RF_ComponentLabel_t *));
1217 if (clabel == NULL)
1218 return (ENOMEM);
1219
1220 retcode = copyin( *clabel_ptr, clabel,
1221 sizeof(RF_ComponentLabel_t));
1222
1223 if (retcode) {
1224 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1225 return(retcode);
1226 }
1227
1228 clabel->row = 0; /* Don't allow looking at anything else.*/
1229
1230 column = clabel->column;
1231
1232 if ((column < 0) || (column >= raidPtr->numCol +
1233 raidPtr->numSpare)) {
1234 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1235 return(EINVAL);
1236 }
1237
1238 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1239 raidPtr->raid_cinfo[column].ci_vp,
1240 clabel );
1241
1242 if (retcode == 0) {
1243 retcode = copyout(clabel, *clabel_ptr,
1244 sizeof(RF_ComponentLabel_t));
1245 }
1246 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1247 return (retcode);
1248
1249 case RAIDFRAME_SET_COMPONENT_LABEL:
1250 clabel = (RF_ComponentLabel_t *) data;
1251
1252 /* XXX check the label for valid stuff... */
1253 /* Note that some things *should not* get modified --
1254 the user should be re-initing the labels instead of
1255 trying to patch things.
1256 */
1257
1258 raidid = raidPtr->raidid;
1259 #ifdef DEBUG
1260 printf("raid%d: Got component label:\n", raidid);
1261 printf("raid%d: Version: %d\n", raidid, clabel->version);
1262 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1263 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1264 printf("raid%d: Column: %d\n", raidid, clabel->column);
1265 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1266 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1267 printf("raid%d: Status: %d\n", raidid, clabel->status);
1268 #endif
1269 clabel->row = 0;
1270 column = clabel->column;
1271
1272 if ((column < 0) || (column >= raidPtr->numCol)) {
1273 return(EINVAL);
1274 }
1275
1276 /* XXX this isn't allowed to do anything for now :-) */
1277
1278 /* XXX and before it is, we need to fill in the rest
1279 of the fields!?!?!?! */
1280 #if 0
1281 raidwrite_component_label(
1282 raidPtr->Disks[column].dev,
1283 raidPtr->raid_cinfo[column].ci_vp,
1284 clabel );
1285 #endif
1286 return (0);
1287
1288 case RAIDFRAME_INIT_LABELS:
1289 clabel = (RF_ComponentLabel_t *) data;
1290 /*
1291 we only want the serial number from
1292 the above. We get all the rest of the information
1293 from the config that was used to create this RAID
1294 set.
1295 */
1296
1297 raidPtr->serial_number = clabel->serial_number;
1298
1299 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1300 (RF_ComponentLabel_t *));
1301 if (ci_label == NULL)
1302 return (ENOMEM);
1303
1304 raid_init_component_label(raidPtr, ci_label);
1305 ci_label->serial_number = clabel->serial_number;
1306 ci_label->row = 0; /* we dont' pretend to support more */
1307
1308 for(column=0;column<raidPtr->numCol;column++) {
1309 diskPtr = &raidPtr->Disks[column];
1310 if (!RF_DEAD_DISK(diskPtr->status)) {
1311 ci_label->partitionSize = diskPtr->partitionSize;
1312 ci_label->column = column;
1313 raidwrite_component_label(
1314 raidPtr->Disks[column].dev,
1315 raidPtr->raid_cinfo[column].ci_vp,
1316 ci_label );
1317 }
1318 }
1319 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1320
1321 return (retcode);
1322 case RAIDFRAME_SET_AUTOCONFIG:
1323 d = rf_set_autoconfig(raidPtr, *(int *) data);
1324 printf("raid%d: New autoconfig value is: %d\n",
1325 raidPtr->raidid, d);
1326 *(int *) data = d;
1327 return (retcode);
1328
1329 case RAIDFRAME_SET_ROOT:
1330 d = rf_set_rootpartition(raidPtr, *(int *) data);
1331 printf("raid%d: New rootpartition value is: %d\n",
1332 raidPtr->raidid, d);
1333 *(int *) data = d;
1334 return (retcode);
1335
1336 /* initialize all parity */
1337 case RAIDFRAME_REWRITEPARITY:
1338
1339 if (raidPtr->Layout.map->faultsTolerated == 0) {
1340 /* Parity for RAID 0 is trivially correct */
1341 raidPtr->parity_good = RF_RAID_CLEAN;
1342 return(0);
1343 }
1344
1345 if (raidPtr->parity_rewrite_in_progress == 1) {
1346 /* Re-write is already in progress! */
1347 return(EINVAL);
1348 }
1349
1350 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1351 rf_RewriteParityThread,
1352 raidPtr,"raid_parity");
1353 return (retcode);
1354
1355
1356 case RAIDFRAME_ADD_HOT_SPARE:
1357 sparePtr = (RF_SingleComponent_t *) data;
1358 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1359 retcode = rf_add_hot_spare(raidPtr, &component);
1360 return(retcode);
1361
1362 case RAIDFRAME_REMOVE_HOT_SPARE:
1363 return(retcode);
1364
1365 case RAIDFRAME_DELETE_COMPONENT:
1366 componentPtr = (RF_SingleComponent_t *)data;
1367 memcpy( &component, componentPtr,
1368 sizeof(RF_SingleComponent_t));
1369 retcode = rf_delete_component(raidPtr, &component);
1370 return(retcode);
1371
1372 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1373 componentPtr = (RF_SingleComponent_t *)data;
1374 memcpy( &component, componentPtr,
1375 sizeof(RF_SingleComponent_t));
1376 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1377 return(retcode);
1378
1379 case RAIDFRAME_REBUILD_IN_PLACE:
1380
1381 if (raidPtr->Layout.map->faultsTolerated == 0) {
1382 /* Can't do this on a RAID 0!! */
1383 return(EINVAL);
1384 }
1385
1386 if (raidPtr->recon_in_progress == 1) {
1387 /* a reconstruct is already in progress! */
1388 return(EINVAL);
1389 }
1390
1391 componentPtr = (RF_SingleComponent_t *) data;
1392 memcpy( &component, componentPtr,
1393 sizeof(RF_SingleComponent_t));
1394 component.row = 0; /* we don't support any more */
1395 column = component.column;
1396
1397 if ((column < 0) || (column >= raidPtr->numCol)) {
1398 return(EINVAL);
1399 }
1400
1401 RF_LOCK_MUTEX(raidPtr->mutex);
1402 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1403 (raidPtr->numFailures > 0)) {
1404 /* XXX 0 above shouldn't be constant!!! */
1405 /* some component other than this has failed.
1406 Let's not make things worse than they already
1407 are... */
1408 printf("raid%d: Unable to reconstruct to disk at:\n",
1409 raidPtr->raidid);
1410 printf("raid%d: Col: %d Too many failures.\n",
1411 raidPtr->raidid, column);
1412 RF_UNLOCK_MUTEX(raidPtr->mutex);
1413 return (EINVAL);
1414 }
1415 if (raidPtr->Disks[column].status ==
1416 rf_ds_reconstructing) {
1417 printf("raid%d: Unable to reconstruct to disk at:\n",
1418 raidPtr->raidid);
1419 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1420
1421 RF_UNLOCK_MUTEX(raidPtr->mutex);
1422 return (EINVAL);
1423 }
1424 if (raidPtr->Disks[column].status == rf_ds_spared) {
1425 RF_UNLOCK_MUTEX(raidPtr->mutex);
1426 return (EINVAL);
1427 }
1428 RF_UNLOCK_MUTEX(raidPtr->mutex);
1429
1430 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1431 if (rrcopy == NULL)
1432 return(ENOMEM);
1433
1434 rrcopy->raidPtr = (void *) raidPtr;
1435 rrcopy->col = column;
1436
1437 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1438 rf_ReconstructInPlaceThread,
1439 rrcopy,"raid_reconip");
1440 return(retcode);
1441
1442 case RAIDFRAME_GET_INFO:
1443 if (!raidPtr->valid)
1444 return (ENODEV);
1445 ucfgp = (RF_DeviceConfig_t **) data;
1446 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1447 (RF_DeviceConfig_t *));
1448 if (d_cfg == NULL)
1449 return (ENOMEM);
1450 d_cfg->rows = 1; /* there is only 1 row now */
1451 d_cfg->cols = raidPtr->numCol;
1452 d_cfg->ndevs = raidPtr->numCol;
1453 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1454 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1455 return (ENOMEM);
1456 }
1457 d_cfg->nspares = raidPtr->numSpare;
1458 if (d_cfg->nspares >= RF_MAX_DISKS) {
1459 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1460 return (ENOMEM);
1461 }
1462 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1463 d = 0;
1464 for (j = 0; j < d_cfg->cols; j++) {
1465 d_cfg->devs[d] = raidPtr->Disks[j];
1466 d++;
1467 }
1468 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1469 d_cfg->spares[i] = raidPtr->Disks[j];
1470 }
1471 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1472 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1473
1474 return (retcode);
1475
1476 case RAIDFRAME_CHECK_PARITY:
1477 *(int *) data = raidPtr->parity_good;
1478 return (0);
1479
1480 case RAIDFRAME_RESET_ACCTOTALS:
1481 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1482 return (0);
1483
1484 case RAIDFRAME_GET_ACCTOTALS:
1485 totals = (RF_AccTotals_t *) data;
1486 *totals = raidPtr->acc_totals;
1487 return (0);
1488
1489 case RAIDFRAME_KEEP_ACCTOTALS:
1490 raidPtr->keep_acc_totals = *(int *)data;
1491 return (0);
1492
1493 case RAIDFRAME_GET_SIZE:
1494 *(int *) data = raidPtr->totalSectors;
1495 return (0);
1496
1497 /* fail a disk & optionally start reconstruction */
1498 case RAIDFRAME_FAIL_DISK:
1499
1500 if (raidPtr->Layout.map->faultsTolerated == 0) {
1501 /* Can't do this on a RAID 0!! */
1502 return(EINVAL);
1503 }
1504
1505 rr = (struct rf_recon_req *) data;
1506 rr->row = 0;
1507 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1508 return (EINVAL);
1509
1510
1511 RF_LOCK_MUTEX(raidPtr->mutex);
1512 if (raidPtr->status == rf_rs_reconstructing) {
1513 /* you can't fail a disk while we're reconstructing! */
1514 /* XXX wrong for RAID6 */
1515 RF_UNLOCK_MUTEX(raidPtr->mutex);
1516 return (EINVAL);
1517 }
1518 if ((raidPtr->Disks[rr->col].status ==
1519 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1520 /* some other component has failed. Let's not make
1521 things worse. XXX wrong for RAID6 */
1522 RF_UNLOCK_MUTEX(raidPtr->mutex);
1523 return (EINVAL);
1524 }
1525 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1526 /* Can't fail a spared disk! */
1527 RF_UNLOCK_MUTEX(raidPtr->mutex);
1528 return (EINVAL);
1529 }
1530 RF_UNLOCK_MUTEX(raidPtr->mutex);
1531
1532 /* make a copy of the recon request so that we don't rely on
1533 * the user's buffer */
1534 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1535 if (rrcopy == NULL)
1536 return(ENOMEM);
1537 memcpy(rrcopy, rr, sizeof(*rr));
1538 rrcopy->raidPtr = (void *) raidPtr;
1539
1540 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1541 rf_ReconThread,
1542 rrcopy,"raid_recon");
1543 return (0);
1544
1545 /* invoke a copyback operation after recon on whatever disk
1546 * needs it, if any */
1547 case RAIDFRAME_COPYBACK:
1548
1549 if (raidPtr->Layout.map->faultsTolerated == 0) {
1550 /* This makes no sense on a RAID 0!! */
1551 return(EINVAL);
1552 }
1553
1554 if (raidPtr->copyback_in_progress == 1) {
1555 /* Copyback is already in progress! */
1556 return(EINVAL);
1557 }
1558
1559 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1560 rf_CopybackThread,
1561 raidPtr,"raid_copyback");
1562 return (retcode);
1563
1564 /* return the percentage completion of reconstruction */
1565 case RAIDFRAME_CHECK_RECON_STATUS:
1566 if (raidPtr->Layout.map->faultsTolerated == 0) {
1567 /* This makes no sense on a RAID 0, so tell the
1568 user it's done. */
1569 *(int *) data = 100;
1570 return(0);
1571 }
1572 if (raidPtr->status != rf_rs_reconstructing)
1573 *(int *) data = 100;
1574 else {
1575 if (raidPtr->reconControl->numRUsTotal > 0) {
1576 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1577 } else {
1578 *(int *) data = 0;
1579 }
1580 }
1581 return (0);
1582 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1583 progressInfoPtr = (RF_ProgressInfo_t **) data;
1584 if (raidPtr->status != rf_rs_reconstructing) {
1585 progressInfo.remaining = 0;
1586 progressInfo.completed = 100;
1587 progressInfo.total = 100;
1588 } else {
1589 progressInfo.total =
1590 raidPtr->reconControl->numRUsTotal;
1591 progressInfo.completed =
1592 raidPtr->reconControl->numRUsComplete;
1593 progressInfo.remaining = progressInfo.total -
1594 progressInfo.completed;
1595 }
1596 retcode = copyout(&progressInfo, *progressInfoPtr,
1597 sizeof(RF_ProgressInfo_t));
1598 return (retcode);
1599
1600 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1601 if (raidPtr->Layout.map->faultsTolerated == 0) {
1602 /* This makes no sense on a RAID 0, so tell the
1603 user it's done. */
1604 *(int *) data = 100;
1605 return(0);
1606 }
1607 if (raidPtr->parity_rewrite_in_progress == 1) {
1608 *(int *) data = 100 *
1609 raidPtr->parity_rewrite_stripes_done /
1610 raidPtr->Layout.numStripe;
1611 } else {
1612 *(int *) data = 100;
1613 }
1614 return (0);
1615
1616 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1617 progressInfoPtr = (RF_ProgressInfo_t **) data;
1618 if (raidPtr->parity_rewrite_in_progress == 1) {
1619 progressInfo.total = raidPtr->Layout.numStripe;
1620 progressInfo.completed =
1621 raidPtr->parity_rewrite_stripes_done;
1622 progressInfo.remaining = progressInfo.total -
1623 progressInfo.completed;
1624 } else {
1625 progressInfo.remaining = 0;
1626 progressInfo.completed = 100;
1627 progressInfo.total = 100;
1628 }
1629 retcode = copyout(&progressInfo, *progressInfoPtr,
1630 sizeof(RF_ProgressInfo_t));
1631 return (retcode);
1632
1633 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1634 if (raidPtr->Layout.map->faultsTolerated == 0) {
1635 /* This makes no sense on a RAID 0 */
1636 *(int *) data = 100;
1637 return(0);
1638 }
1639 if (raidPtr->copyback_in_progress == 1) {
1640 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1641 raidPtr->Layout.numStripe;
1642 } else {
1643 *(int *) data = 100;
1644 }
1645 return (0);
1646
1647 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1648 progressInfoPtr = (RF_ProgressInfo_t **) data;
1649 if (raidPtr->copyback_in_progress == 1) {
1650 progressInfo.total = raidPtr->Layout.numStripe;
1651 progressInfo.completed =
1652 raidPtr->copyback_stripes_done;
1653 progressInfo.remaining = progressInfo.total -
1654 progressInfo.completed;
1655 } else {
1656 progressInfo.remaining = 0;
1657 progressInfo.completed = 100;
1658 progressInfo.total = 100;
1659 }
1660 retcode = copyout(&progressInfo, *progressInfoPtr,
1661 sizeof(RF_ProgressInfo_t));
1662 return (retcode);
1663
1664 /* the sparetable daemon calls this to wait for the kernel to
1665 * need a spare table. this ioctl does not return until a
1666 * spare table is needed. XXX -- calling mpsleep here in the
1667 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1668 * -- I should either compute the spare table in the kernel,
1669 * or have a different -- XXX XXX -- interface (a different
1670 * character device) for delivering the table -- XXX */
1671 #if 0
1672 case RAIDFRAME_SPARET_WAIT:
1673 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1674 while (!rf_sparet_wait_queue)
1675 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1676 waitreq = rf_sparet_wait_queue;
1677 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1678 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1679
1680 /* structure assignment */
1681 *((RF_SparetWait_t *) data) = *waitreq;
1682
1683 RF_Free(waitreq, sizeof(*waitreq));
1684 return (0);
1685
1686 /* wakes up a process waiting on SPARET_WAIT and puts an error
1687 * code in it that will cause the dameon to exit */
1688 case RAIDFRAME_ABORT_SPARET_WAIT:
1689 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1690 waitreq->fcol = -1;
1691 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1692 waitreq->next = rf_sparet_wait_queue;
1693 rf_sparet_wait_queue = waitreq;
1694 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1695 wakeup(&rf_sparet_wait_queue);
1696 return (0);
1697
1698 /* used by the spare table daemon to deliver a spare table
1699 * into the kernel */
1700 case RAIDFRAME_SEND_SPARET:
1701
1702 /* install the spare table */
1703 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1704
1705 /* respond to the requestor. the return status of the spare
1706 * table installation is passed in the "fcol" field */
1707 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1708 waitreq->fcol = retcode;
1709 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1710 waitreq->next = rf_sparet_resp_queue;
1711 rf_sparet_resp_queue = waitreq;
1712 wakeup(&rf_sparet_resp_queue);
1713 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1714
1715 return (retcode);
1716 #endif
1717
1718 default:
1719 break; /* fall through to the os-specific code below */
1720
1721 }
1722
1723 if (!raidPtr->valid)
1724 return (EINVAL);
1725
1726 /*
1727 * Add support for "regular" device ioctls here.
1728 */
1729
1730 switch (cmd) {
1731 case DIOCGDINFO:
1732 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1733 break;
1734 #ifdef __HAVE_OLD_DISKLABEL
1735 case ODIOCGDINFO:
1736 newlabel = *(rs->sc_dkdev.dk_label);
1737 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1738 return ENOTTY;
1739 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1740 break;
1741 #endif
1742
1743 case DIOCGPART:
1744 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1745 ((struct partinfo *) data)->part =
1746 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1747 break;
1748
1749 case DIOCWDINFO:
1750 case DIOCSDINFO:
1751 #ifdef __HAVE_OLD_DISKLABEL
1752 case ODIOCWDINFO:
1753 case ODIOCSDINFO:
1754 #endif
1755 {
1756 struct disklabel *lp;
1757 #ifdef __HAVE_OLD_DISKLABEL
1758 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1759 memset(&newlabel, 0, sizeof newlabel);
1760 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1761 lp = &newlabel;
1762 } else
1763 #endif
1764 lp = (struct disklabel *)data;
1765
1766 if ((error = raidlock(rs)) != 0)
1767 return (error);
1768
1769 rs->sc_flags |= RAIDF_LABELLING;
1770
1771 error = setdisklabel(rs->sc_dkdev.dk_label,
1772 lp, 0, rs->sc_dkdev.dk_cpulabel);
1773 if (error == 0) {
1774 if (cmd == DIOCWDINFO
1775 #ifdef __HAVE_OLD_DISKLABEL
1776 || cmd == ODIOCWDINFO
1777 #endif
1778 )
1779 error = writedisklabel(RAIDLABELDEV(dev),
1780 raidstrategy, rs->sc_dkdev.dk_label,
1781 rs->sc_dkdev.dk_cpulabel);
1782 }
1783 rs->sc_flags &= ~RAIDF_LABELLING;
1784
1785 raidunlock(rs);
1786
1787 if (error)
1788 return (error);
1789 break;
1790 }
1791
1792 case DIOCWLABEL:
1793 if (*(int *) data != 0)
1794 rs->sc_flags |= RAIDF_WLABEL;
1795 else
1796 rs->sc_flags &= ~RAIDF_WLABEL;
1797 break;
1798
1799 case DIOCGDEFLABEL:
1800 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1801 break;
1802
1803 #ifdef __HAVE_OLD_DISKLABEL
1804 case ODIOCGDEFLABEL:
1805 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1806 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1807 return ENOTTY;
1808 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1809 break;
1810 #endif
1811
1812 case DIOCAWEDGE:
1813 case DIOCDWEDGE:
1814 dkw = (void *)data;
1815
1816 /* If the ioctl happens here, the parent is us. */
1817 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1818 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1819
1820 case DIOCLWEDGES:
1821 return dkwedge_list(&rs->sc_dkdev,
1822 (struct dkwedge_list *)data, l);
1823
1824 default:
1825 retcode = ENOTTY;
1826 }
1827 return (retcode);
1828
1829 }
1830
1831
1832 /* raidinit -- complete the rest of the initialization for the
1833 RAIDframe device. */
1834
1835
1836 static void
1837 raidinit(RF_Raid_t *raidPtr)
1838 {
1839 struct cfdata *cf;
1840 struct raid_softc *rs;
1841 int unit;
1842
1843 unit = raidPtr->raidid;
1844
1845 rs = &raid_softc[unit];
1846
1847 /* XXX should check return code first... */
1848 rs->sc_flags |= RAIDF_INITED;
1849
1850 /* XXX doesn't check bounds. */
1851 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1852
1853 rs->sc_dkdev.dk_name = rs->sc_xname;
1854
1855 /* attach the pseudo device */
1856 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1857 cf->cf_name = raid_cd.cd_name;
1858 cf->cf_atname = raid_cd.cd_name;
1859 cf->cf_unit = unit;
1860 cf->cf_fstate = FSTATE_STAR;
1861
1862 rs->sc_dev = config_attach_pseudo(cf);
1863
1864 if (rs->sc_dev==NULL) {
1865 printf("raid%d: config_attach_pseudo failed\n",
1866 raidPtr->raidid);
1867 }
1868
1869 /* disk_attach actually creates space for the CPU disklabel, among
1870 * other things, so it's critical to call this *BEFORE* we try putzing
1871 * with disklabels. */
1872
1873 disk_attach(&rs->sc_dkdev);
1874
1875 /* XXX There may be a weird interaction here between this, and
1876 * protectedSectors, as used in RAIDframe. */
1877
1878 rs->sc_size = raidPtr->totalSectors;
1879 }
1880 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1881 /* wake up the daemon & tell it to get us a spare table
1882 * XXX
1883 * the entries in the queues should be tagged with the raidPtr
1884 * so that in the extremely rare case that two recons happen at once,
1885 * we know for which device were requesting a spare table
1886 * XXX
1887 *
1888 * XXX This code is not currently used. GO
1889 */
1890 int
1891 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1892 {
1893 int retcode;
1894
1895 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1896 req->next = rf_sparet_wait_queue;
1897 rf_sparet_wait_queue = req;
1898 wakeup(&rf_sparet_wait_queue);
1899
1900 /* mpsleep unlocks the mutex */
1901 while (!rf_sparet_resp_queue) {
1902 tsleep(&rf_sparet_resp_queue, PRIBIO,
1903 "raidframe getsparetable", 0);
1904 }
1905 req = rf_sparet_resp_queue;
1906 rf_sparet_resp_queue = req->next;
1907 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1908
1909 retcode = req->fcol;
1910 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1911 * alloc'd */
1912 return (retcode);
1913 }
1914 #endif
1915
1916 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1917 * bp & passes it down.
1918 * any calls originating in the kernel must use non-blocking I/O
1919 * do some extra sanity checking to return "appropriate" error values for
1920 * certain conditions (to make some standard utilities work)
1921 *
1922 * Formerly known as: rf_DoAccessKernel
1923 */
1924 void
1925 raidstart(RF_Raid_t *raidPtr)
1926 {
1927 RF_SectorCount_t num_blocks, pb, sum;
1928 RF_RaidAddr_t raid_addr;
1929 struct partition *pp;
1930 daddr_t blocknum;
1931 int unit;
1932 struct raid_softc *rs;
1933 int do_async;
1934 struct buf *bp;
1935 int rc;
1936
1937 unit = raidPtr->raidid;
1938 rs = &raid_softc[unit];
1939
1940 /* quick check to see if anything has died recently */
1941 RF_LOCK_MUTEX(raidPtr->mutex);
1942 if (raidPtr->numNewFailures > 0) {
1943 RF_UNLOCK_MUTEX(raidPtr->mutex);
1944 rf_update_component_labels(raidPtr,
1945 RF_NORMAL_COMPONENT_UPDATE);
1946 RF_LOCK_MUTEX(raidPtr->mutex);
1947 raidPtr->numNewFailures--;
1948 }
1949
1950 /* Check to see if we're at the limit... */
1951 while (raidPtr->openings > 0) {
1952 RF_UNLOCK_MUTEX(raidPtr->mutex);
1953
1954 /* get the next item, if any, from the queue */
1955 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1956 /* nothing more to do */
1957 return;
1958 }
1959
1960 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1961 * partition.. Need to make it absolute to the underlying
1962 * device.. */
1963
1964 blocknum = bp->b_blkno;
1965 if (DISKPART(bp->b_dev) != RAW_PART) {
1966 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1967 blocknum += pp->p_offset;
1968 }
1969
1970 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1971 (int) blocknum));
1972
1973 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1974 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1975
1976 /* *THIS* is where we adjust what block we're going to...
1977 * but DO NOT TOUCH bp->b_blkno!!! */
1978 raid_addr = blocknum;
1979
1980 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1981 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1982 sum = raid_addr + num_blocks + pb;
1983 if (1 || rf_debugKernelAccess) {
1984 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1985 (int) raid_addr, (int) sum, (int) num_blocks,
1986 (int) pb, (int) bp->b_resid));
1987 }
1988 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1989 || (sum < num_blocks) || (sum < pb)) {
1990 bp->b_error = ENOSPC;
1991 bp->b_resid = bp->b_bcount;
1992 biodone(bp);
1993 RF_LOCK_MUTEX(raidPtr->mutex);
1994 continue;
1995 }
1996 /*
1997 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1998 */
1999
2000 if (bp->b_bcount & raidPtr->sectorMask) {
2001 bp->b_error = EINVAL;
2002 bp->b_resid = bp->b_bcount;
2003 biodone(bp);
2004 RF_LOCK_MUTEX(raidPtr->mutex);
2005 continue;
2006
2007 }
2008 db1_printf(("Calling DoAccess..\n"));
2009
2010
2011 RF_LOCK_MUTEX(raidPtr->mutex);
2012 raidPtr->openings--;
2013 RF_UNLOCK_MUTEX(raidPtr->mutex);
2014
2015 /*
2016 * Everything is async.
2017 */
2018 do_async = 1;
2019
2020 disk_busy(&rs->sc_dkdev);
2021
2022 /* XXX we're still at splbio() here... do we *really*
2023 need to be? */
2024
2025 /* don't ever condition on bp->b_flags & B_WRITE.
2026 * always condition on B_READ instead */
2027
2028 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2029 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2030 do_async, raid_addr, num_blocks,
2031 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2032
2033 if (rc) {
2034 bp->b_error = rc;
2035 bp->b_resid = bp->b_bcount;
2036 biodone(bp);
2037 /* continue loop */
2038 }
2039
2040 RF_LOCK_MUTEX(raidPtr->mutex);
2041 }
2042 RF_UNLOCK_MUTEX(raidPtr->mutex);
2043 }
2044
2045
2046
2047
2048 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2049
2050 int
2051 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2052 {
2053 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2054 struct buf *bp;
2055
2056 req->queue = queue;
2057
2058 #if DIAGNOSTIC
2059 if (queue->raidPtr->raidid >= numraid) {
2060 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
2061 numraid);
2062 panic("Invalid Unit number in rf_DispatchKernelIO");
2063 }
2064 #endif
2065
2066 bp = req->bp;
2067
2068 switch (req->type) {
2069 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2070 /* XXX need to do something extra here.. */
2071 /* I'm leaving this in, as I've never actually seen it used,
2072 * and I'd like folks to report it... GO */
2073 printf(("WAKEUP CALLED\n"));
2074 queue->numOutstanding++;
2075
2076 bp->b_flags = 0;
2077 bp->b_private = req;
2078
2079 KernelWakeupFunc(bp);
2080 break;
2081
2082 case RF_IO_TYPE_READ:
2083 case RF_IO_TYPE_WRITE:
2084 #if RF_ACC_TRACE > 0
2085 if (req->tracerec) {
2086 RF_ETIMER_START(req->tracerec->timer);
2087 }
2088 #endif
2089 InitBP(bp, queue->rf_cinfo->ci_vp,
2090 op, queue->rf_cinfo->ci_dev,
2091 req->sectorOffset, req->numSector,
2092 req->buf, KernelWakeupFunc, (void *) req,
2093 queue->raidPtr->logBytesPerSector, req->b_proc);
2094
2095 if (rf_debugKernelAccess) {
2096 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2097 (long) bp->b_blkno));
2098 }
2099 queue->numOutstanding++;
2100 queue->last_deq_sector = req->sectorOffset;
2101 /* acc wouldn't have been let in if there were any pending
2102 * reqs at any other priority */
2103 queue->curPriority = req->priority;
2104
2105 db1_printf(("Going for %c to unit %d col %d\n",
2106 req->type, queue->raidPtr->raidid,
2107 queue->col));
2108 db1_printf(("sector %d count %d (%d bytes) %d\n",
2109 (int) req->sectorOffset, (int) req->numSector,
2110 (int) (req->numSector <<
2111 queue->raidPtr->logBytesPerSector),
2112 (int) queue->raidPtr->logBytesPerSector));
2113 VOP_STRATEGY(bp->b_vp, bp);
2114
2115 break;
2116
2117 default:
2118 panic("bad req->type in rf_DispatchKernelIO");
2119 }
2120 db1_printf(("Exiting from DispatchKernelIO\n"));
2121
2122 return (0);
2123 }
2124 /* this is the callback function associated with a I/O invoked from
2125 kernel code.
2126 */
2127 static void
2128 KernelWakeupFunc(struct buf *bp)
2129 {
2130 RF_DiskQueueData_t *req = NULL;
2131 RF_DiskQueue_t *queue;
2132 int s;
2133
2134 s = splbio();
2135 db1_printf(("recovering the request queue:\n"));
2136 req = bp->b_private;
2137
2138 queue = (RF_DiskQueue_t *) req->queue;
2139
2140 #if RF_ACC_TRACE > 0
2141 if (req->tracerec) {
2142 RF_ETIMER_STOP(req->tracerec->timer);
2143 RF_ETIMER_EVAL(req->tracerec->timer);
2144 RF_LOCK_MUTEX(rf_tracing_mutex);
2145 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2146 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2147 req->tracerec->num_phys_ios++;
2148 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2149 }
2150 #endif
2151
2152 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2153 * ballistic, and mark the component as hosed... */
2154
2155 if (bp->b_error != 0) {
2156 /* Mark the disk as dead */
2157 /* but only mark it once... */
2158 /* and only if it wouldn't leave this RAID set
2159 completely broken */
2160 if (((queue->raidPtr->Disks[queue->col].status ==
2161 rf_ds_optimal) ||
2162 (queue->raidPtr->Disks[queue->col].status ==
2163 rf_ds_used_spare)) &&
2164 (queue->raidPtr->numFailures <
2165 queue->raidPtr->Layout.map->faultsTolerated)) {
2166 printf("raid%d: IO Error. Marking %s as failed.\n",
2167 queue->raidPtr->raidid,
2168 queue->raidPtr->Disks[queue->col].devname);
2169 queue->raidPtr->Disks[queue->col].status =
2170 rf_ds_failed;
2171 queue->raidPtr->status = rf_rs_degraded;
2172 queue->raidPtr->numFailures++;
2173 queue->raidPtr->numNewFailures++;
2174 } else { /* Disk is already dead... */
2175 /* printf("Disk already marked as dead!\n"); */
2176 }
2177
2178 }
2179
2180 /* Fill in the error value */
2181
2182 req->error = bp->b_error;
2183
2184 simple_lock(&queue->raidPtr->iodone_lock);
2185
2186 /* Drop this one on the "finished" queue... */
2187 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2188
2189 /* Let the raidio thread know there is work to be done. */
2190 wakeup(&(queue->raidPtr->iodone));
2191
2192 simple_unlock(&queue->raidPtr->iodone_lock);
2193
2194 splx(s);
2195 }
2196
2197
2198
2199 /*
2200 * initialize a buf structure for doing an I/O in the kernel.
2201 */
2202 static void
2203 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2204 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2205 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2206 struct proc *b_proc)
2207 {
2208 /* bp->b_flags = B_PHYS | rw_flag; */
2209 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2210 bp->b_bcount = numSect << logBytesPerSector;
2211 bp->b_bufsize = bp->b_bcount;
2212 bp->b_error = 0;
2213 bp->b_dev = dev;
2214 bp->b_data = bf;
2215 bp->b_blkno = startSect;
2216 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2217 if (bp->b_bcount == 0) {
2218 panic("bp->b_bcount is zero in InitBP!!");
2219 }
2220 bp->b_proc = b_proc;
2221 bp->b_iodone = cbFunc;
2222 bp->b_private = cbArg;
2223 bp->b_vp = b_vp;
2224 if ((bp->b_flags & B_READ) == 0) {
2225 bp->b_vp->v_numoutput++;
2226 }
2227
2228 }
2229
2230 static void
2231 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2232 struct disklabel *lp)
2233 {
2234 memset(lp, 0, sizeof(*lp));
2235
2236 /* fabricate a label... */
2237 lp->d_secperunit = raidPtr->totalSectors;
2238 lp->d_secsize = raidPtr->bytesPerSector;
2239 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2240 lp->d_ntracks = 4 * raidPtr->numCol;
2241 lp->d_ncylinders = raidPtr->totalSectors /
2242 (lp->d_nsectors * lp->d_ntracks);
2243 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2244
2245 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2246 lp->d_type = DTYPE_RAID;
2247 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2248 lp->d_rpm = 3600;
2249 lp->d_interleave = 1;
2250 lp->d_flags = 0;
2251
2252 lp->d_partitions[RAW_PART].p_offset = 0;
2253 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2254 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2255 lp->d_npartitions = RAW_PART + 1;
2256
2257 lp->d_magic = DISKMAGIC;
2258 lp->d_magic2 = DISKMAGIC;
2259 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2260
2261 }
2262 /*
2263 * Read the disklabel from the raid device. If one is not present, fake one
2264 * up.
2265 */
2266 static void
2267 raidgetdisklabel(dev_t dev)
2268 {
2269 int unit = raidunit(dev);
2270 struct raid_softc *rs = &raid_softc[unit];
2271 const char *errstring;
2272 struct disklabel *lp = rs->sc_dkdev.dk_label;
2273 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2274 RF_Raid_t *raidPtr;
2275
2276 db1_printf(("Getting the disklabel...\n"));
2277
2278 memset(clp, 0, sizeof(*clp));
2279
2280 raidPtr = raidPtrs[unit];
2281
2282 raidgetdefaultlabel(raidPtr, rs, lp);
2283
2284 /*
2285 * Call the generic disklabel extraction routine.
2286 */
2287 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2288 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2289 if (errstring)
2290 raidmakedisklabel(rs);
2291 else {
2292 int i;
2293 struct partition *pp;
2294
2295 /*
2296 * Sanity check whether the found disklabel is valid.
2297 *
2298 * This is necessary since total size of the raid device
2299 * may vary when an interleave is changed even though exactly
2300 * same components are used, and old disklabel may used
2301 * if that is found.
2302 */
2303 if (lp->d_secperunit != rs->sc_size)
2304 printf("raid%d: WARNING: %s: "
2305 "total sector size in disklabel (%d) != "
2306 "the size of raid (%ld)\n", unit, rs->sc_xname,
2307 lp->d_secperunit, (long) rs->sc_size);
2308 for (i = 0; i < lp->d_npartitions; i++) {
2309 pp = &lp->d_partitions[i];
2310 if (pp->p_offset + pp->p_size > rs->sc_size)
2311 printf("raid%d: WARNING: %s: end of partition `%c' "
2312 "exceeds the size of raid (%ld)\n",
2313 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2314 }
2315 }
2316
2317 }
2318 /*
2319 * Take care of things one might want to take care of in the event
2320 * that a disklabel isn't present.
2321 */
2322 static void
2323 raidmakedisklabel(struct raid_softc *rs)
2324 {
2325 struct disklabel *lp = rs->sc_dkdev.dk_label;
2326 db1_printf(("Making a label..\n"));
2327
2328 /*
2329 * For historical reasons, if there's no disklabel present
2330 * the raw partition must be marked FS_BSDFFS.
2331 */
2332
2333 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2334
2335 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2336
2337 lp->d_checksum = dkcksum(lp);
2338 }
2339 /*
2340 * Wait interruptibly for an exclusive lock.
2341 *
2342 * XXX
2343 * Several drivers do this; it should be abstracted and made MP-safe.
2344 * (Hmm... where have we seen this warning before :-> GO )
2345 */
2346 static int
2347 raidlock(struct raid_softc *rs)
2348 {
2349 int error;
2350
2351 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2352 rs->sc_flags |= RAIDF_WANTED;
2353 if ((error =
2354 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2355 return (error);
2356 }
2357 rs->sc_flags |= RAIDF_LOCKED;
2358 return (0);
2359 }
2360 /*
2361 * Unlock and wake up any waiters.
2362 */
2363 static void
2364 raidunlock(struct raid_softc *rs)
2365 {
2366
2367 rs->sc_flags &= ~RAIDF_LOCKED;
2368 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2369 rs->sc_flags &= ~RAIDF_WANTED;
2370 wakeup(rs);
2371 }
2372 }
2373
2374
2375 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2376 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2377
2378 int
2379 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2380 {
2381 RF_ComponentLabel_t clabel;
2382 raidread_component_label(dev, b_vp, &clabel);
2383 clabel.mod_counter = mod_counter;
2384 clabel.clean = RF_RAID_CLEAN;
2385 raidwrite_component_label(dev, b_vp, &clabel);
2386 return(0);
2387 }
2388
2389
2390 int
2391 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2392 {
2393 RF_ComponentLabel_t clabel;
2394 raidread_component_label(dev, b_vp, &clabel);
2395 clabel.mod_counter = mod_counter;
2396 clabel.clean = RF_RAID_DIRTY;
2397 raidwrite_component_label(dev, b_vp, &clabel);
2398 return(0);
2399 }
2400
2401 /* ARGSUSED */
2402 int
2403 raidread_component_label(dev_t dev, struct vnode *b_vp,
2404 RF_ComponentLabel_t *clabel)
2405 {
2406 struct buf *bp;
2407 const struct bdevsw *bdev;
2408 int error;
2409
2410 /* XXX should probably ensure that we don't try to do this if
2411 someone has changed rf_protected_sectors. */
2412
2413 if (b_vp == NULL) {
2414 /* For whatever reason, this component is not valid.
2415 Don't try to read a component label from it. */
2416 return(EINVAL);
2417 }
2418
2419 /* get a block of the appropriate size... */
2420 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2421 bp->b_dev = dev;
2422
2423 /* get our ducks in a row for the read */
2424 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2425 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2426 bp->b_flags |= B_READ;
2427 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2428
2429 bdev = bdevsw_lookup(bp->b_dev);
2430 if (bdev == NULL)
2431 return (ENXIO);
2432 (*bdev->d_strategy)(bp);
2433
2434 error = biowait(bp);
2435
2436 if (!error) {
2437 memcpy(clabel, bp->b_data,
2438 sizeof(RF_ComponentLabel_t));
2439 }
2440
2441 brelse(bp);
2442 return(error);
2443 }
2444 /* ARGSUSED */
2445 int
2446 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2447 RF_ComponentLabel_t *clabel)
2448 {
2449 struct buf *bp;
2450 const struct bdevsw *bdev;
2451 int error;
2452
2453 /* get a block of the appropriate size... */
2454 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2455 bp->b_dev = dev;
2456
2457 /* get our ducks in a row for the write */
2458 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2459 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2460 bp->b_flags |= B_WRITE;
2461 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2462
2463 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2464
2465 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2466
2467 bdev = bdevsw_lookup(bp->b_dev);
2468 if (bdev == NULL)
2469 return (ENXIO);
2470 (*bdev->d_strategy)(bp);
2471 error = biowait(bp);
2472 brelse(bp);
2473 if (error) {
2474 #if 1
2475 printf("Failed to write RAID component info!\n");
2476 #endif
2477 }
2478
2479 return(error);
2480 }
2481
2482 void
2483 rf_markalldirty(RF_Raid_t *raidPtr)
2484 {
2485 RF_ComponentLabel_t clabel;
2486 int sparecol;
2487 int c;
2488 int j;
2489 int scol = -1;
2490
2491 raidPtr->mod_counter++;
2492 for (c = 0; c < raidPtr->numCol; c++) {
2493 /* we don't want to touch (at all) a disk that has
2494 failed */
2495 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2496 raidread_component_label(
2497 raidPtr->Disks[c].dev,
2498 raidPtr->raid_cinfo[c].ci_vp,
2499 &clabel);
2500 if (clabel.status == rf_ds_spared) {
2501 /* XXX do something special...
2502 but whatever you do, don't
2503 try to access it!! */
2504 } else {
2505 raidmarkdirty(
2506 raidPtr->Disks[c].dev,
2507 raidPtr->raid_cinfo[c].ci_vp,
2508 raidPtr->mod_counter);
2509 }
2510 }
2511 }
2512
2513 for( c = 0; c < raidPtr->numSpare ; c++) {
2514 sparecol = raidPtr->numCol + c;
2515 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2516 /*
2517
2518 we claim this disk is "optimal" if it's
2519 rf_ds_used_spare, as that means it should be
2520 directly substitutable for the disk it replaced.
2521 We note that too...
2522
2523 */
2524
2525 for(j=0;j<raidPtr->numCol;j++) {
2526 if (raidPtr->Disks[j].spareCol == sparecol) {
2527 scol = j;
2528 break;
2529 }
2530 }
2531
2532 raidread_component_label(
2533 raidPtr->Disks[sparecol].dev,
2534 raidPtr->raid_cinfo[sparecol].ci_vp,
2535 &clabel);
2536 /* make sure status is noted */
2537
2538 raid_init_component_label(raidPtr, &clabel);
2539
2540 clabel.row = 0;
2541 clabel.column = scol;
2542 /* Note: we *don't* change status from rf_ds_used_spare
2543 to rf_ds_optimal */
2544 /* clabel.status = rf_ds_optimal; */
2545
2546 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2547 raidPtr->raid_cinfo[sparecol].ci_vp,
2548 raidPtr->mod_counter);
2549 }
2550 }
2551 }
2552
2553
2554 void
2555 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2556 {
2557 RF_ComponentLabel_t clabel;
2558 int sparecol;
2559 int c;
2560 int j;
2561 int scol;
2562
2563 scol = -1;
2564
2565 /* XXX should do extra checks to make sure things really are clean,
2566 rather than blindly setting the clean bit... */
2567
2568 raidPtr->mod_counter++;
2569
2570 for (c = 0; c < raidPtr->numCol; c++) {
2571 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2572 raidread_component_label(
2573 raidPtr->Disks[c].dev,
2574 raidPtr->raid_cinfo[c].ci_vp,
2575 &clabel);
2576 /* make sure status is noted */
2577 clabel.status = rf_ds_optimal;
2578
2579 /* bump the counter */
2580 clabel.mod_counter = raidPtr->mod_counter;
2581
2582 /* note what unit we are configured as */
2583 clabel.last_unit = raidPtr->raidid;
2584
2585 raidwrite_component_label(
2586 raidPtr->Disks[c].dev,
2587 raidPtr->raid_cinfo[c].ci_vp,
2588 &clabel);
2589 if (final == RF_FINAL_COMPONENT_UPDATE) {
2590 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2591 raidmarkclean(
2592 raidPtr->Disks[c].dev,
2593 raidPtr->raid_cinfo[c].ci_vp,
2594 raidPtr->mod_counter);
2595 }
2596 }
2597 }
2598 /* else we don't touch it.. */
2599 }
2600
2601 for( c = 0; c < raidPtr->numSpare ; c++) {
2602 sparecol = raidPtr->numCol + c;
2603 /* Need to ensure that the reconstruct actually completed! */
2604 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2605 /*
2606
2607 we claim this disk is "optimal" if it's
2608 rf_ds_used_spare, as that means it should be
2609 directly substitutable for the disk it replaced.
2610 We note that too...
2611
2612 */
2613
2614 for(j=0;j<raidPtr->numCol;j++) {
2615 if (raidPtr->Disks[j].spareCol == sparecol) {
2616 scol = j;
2617 break;
2618 }
2619 }
2620
2621 /* XXX shouldn't *really* need this... */
2622 raidread_component_label(
2623 raidPtr->Disks[sparecol].dev,
2624 raidPtr->raid_cinfo[sparecol].ci_vp,
2625 &clabel);
2626 /* make sure status is noted */
2627
2628 raid_init_component_label(raidPtr, &clabel);
2629
2630 clabel.mod_counter = raidPtr->mod_counter;
2631 clabel.column = scol;
2632 clabel.status = rf_ds_optimal;
2633 clabel.last_unit = raidPtr->raidid;
2634
2635 raidwrite_component_label(
2636 raidPtr->Disks[sparecol].dev,
2637 raidPtr->raid_cinfo[sparecol].ci_vp,
2638 &clabel);
2639 if (final == RF_FINAL_COMPONENT_UPDATE) {
2640 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2641 raidmarkclean( raidPtr->Disks[sparecol].dev,
2642 raidPtr->raid_cinfo[sparecol].ci_vp,
2643 raidPtr->mod_counter);
2644 }
2645 }
2646 }
2647 }
2648 }
2649
2650 void
2651 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2652 {
2653 struct lwp *l;
2654
2655 l = curlwp;
2656
2657 if (vp != NULL) {
2658 if (auto_configured == 1) {
2659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2660 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2661 vput(vp);
2662
2663 } else {
2664 (void) vn_close(vp, FREAD | FWRITE, l->l_cred, l);
2665 }
2666 }
2667 }
2668
2669
2670 void
2671 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2672 {
2673 int r,c;
2674 struct vnode *vp;
2675 int acd;
2676
2677
2678 /* We take this opportunity to close the vnodes like we should.. */
2679
2680 for (c = 0; c < raidPtr->numCol; c++) {
2681 vp = raidPtr->raid_cinfo[c].ci_vp;
2682 acd = raidPtr->Disks[c].auto_configured;
2683 rf_close_component(raidPtr, vp, acd);
2684 raidPtr->raid_cinfo[c].ci_vp = NULL;
2685 raidPtr->Disks[c].auto_configured = 0;
2686 }
2687
2688 for (r = 0; r < raidPtr->numSpare; r++) {
2689 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2690 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2691 rf_close_component(raidPtr, vp, acd);
2692 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2693 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2694 }
2695 }
2696
2697
2698 void
2699 rf_ReconThread(struct rf_recon_req *req)
2700 {
2701 int s;
2702 RF_Raid_t *raidPtr;
2703
2704 s = splbio();
2705 raidPtr = (RF_Raid_t *) req->raidPtr;
2706 raidPtr->recon_in_progress = 1;
2707
2708 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2709 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2710
2711 RF_Free(req, sizeof(*req));
2712
2713 raidPtr->recon_in_progress = 0;
2714 splx(s);
2715
2716 /* That's all... */
2717 kthread_exit(0); /* does not return */
2718 }
2719
2720 void
2721 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2722 {
2723 int retcode;
2724 int s;
2725
2726 raidPtr->parity_rewrite_stripes_done = 0;
2727 raidPtr->parity_rewrite_in_progress = 1;
2728 s = splbio();
2729 retcode = rf_RewriteParity(raidPtr);
2730 splx(s);
2731 if (retcode) {
2732 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2733 } else {
2734 /* set the clean bit! If we shutdown correctly,
2735 the clean bit on each component label will get
2736 set */
2737 raidPtr->parity_good = RF_RAID_CLEAN;
2738 }
2739 raidPtr->parity_rewrite_in_progress = 0;
2740
2741 /* Anyone waiting for us to stop? If so, inform them... */
2742 if (raidPtr->waitShutdown) {
2743 wakeup(&raidPtr->parity_rewrite_in_progress);
2744 }
2745
2746 /* That's all... */
2747 kthread_exit(0); /* does not return */
2748 }
2749
2750
2751 void
2752 rf_CopybackThread(RF_Raid_t *raidPtr)
2753 {
2754 int s;
2755
2756 raidPtr->copyback_in_progress = 1;
2757 s = splbio();
2758 rf_CopybackReconstructedData(raidPtr);
2759 splx(s);
2760 raidPtr->copyback_in_progress = 0;
2761
2762 /* That's all... */
2763 kthread_exit(0); /* does not return */
2764 }
2765
2766
2767 void
2768 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2769 {
2770 int s;
2771 RF_Raid_t *raidPtr;
2772
2773 s = splbio();
2774 raidPtr = req->raidPtr;
2775 raidPtr->recon_in_progress = 1;
2776 rf_ReconstructInPlace(raidPtr, req->col);
2777 RF_Free(req, sizeof(*req));
2778 raidPtr->recon_in_progress = 0;
2779 splx(s);
2780
2781 /* That's all... */
2782 kthread_exit(0); /* does not return */
2783 }
2784
2785 static RF_AutoConfig_t *
2786 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2787 const char *cname, RF_SectorCount_t size)
2788 {
2789 int good_one = 0;
2790 RF_ComponentLabel_t *clabel;
2791 RF_AutoConfig_t *ac;
2792
2793 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2794 if (clabel == NULL) {
2795 oomem:
2796 while(ac_list) {
2797 ac = ac_list;
2798 if (ac->clabel)
2799 free(ac->clabel, M_RAIDFRAME);
2800 ac_list = ac_list->next;
2801 free(ac, M_RAIDFRAME);
2802 }
2803 printf("RAID auto config: out of memory!\n");
2804 return NULL; /* XXX probably should panic? */
2805 }
2806
2807 if (!raidread_component_label(dev, vp, clabel)) {
2808 /* Got the label. Does it look reasonable? */
2809 if (rf_reasonable_label(clabel) &&
2810 (clabel->partitionSize <= size)) {
2811 #ifdef DEBUG
2812 printf("Component on: %s: %llu\n",
2813 cname, (unsigned long long)size);
2814 rf_print_component_label(clabel);
2815 #endif
2816 /* if it's reasonable, add it, else ignore it. */
2817 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2818 M_NOWAIT);
2819 if (ac == NULL) {
2820 free(clabel, M_RAIDFRAME);
2821 goto oomem;
2822 }
2823 strlcpy(ac->devname, cname, sizeof(ac->devname));
2824 ac->dev = dev;
2825 ac->vp = vp;
2826 ac->clabel = clabel;
2827 ac->next = ac_list;
2828 ac_list = ac;
2829 good_one = 1;
2830 }
2831 }
2832 if (!good_one) {
2833 /* cleanup */
2834 free(clabel, M_RAIDFRAME);
2835 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2836 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2837 vput(vp);
2838 }
2839 return ac_list;
2840 }
2841
2842 RF_AutoConfig_t *
2843 rf_find_raid_components()
2844 {
2845 struct vnode *vp;
2846 struct disklabel label;
2847 struct device *dv;
2848 dev_t dev;
2849 int bmajor, bminor, wedge;
2850 int error;
2851 int i;
2852 RF_AutoConfig_t *ac_list;
2853
2854
2855 /* initialize the AutoConfig list */
2856 ac_list = NULL;
2857
2858 /* we begin by trolling through *all* the devices on the system */
2859
2860 for (dv = alldevs.tqh_first; dv != NULL;
2861 dv = dv->dv_list.tqe_next) {
2862
2863 /* we are only interested in disks... */
2864 if (device_class(dv) != DV_DISK)
2865 continue;
2866
2867 /* we don't care about floppies... */
2868 if (device_is_a(dv, "fd")) {
2869 continue;
2870 }
2871
2872 /* we don't care about CD's... */
2873 if (device_is_a(dv, "cd")) {
2874 continue;
2875 }
2876
2877 /* hdfd is the Atari/Hades floppy driver */
2878 if (device_is_a(dv, "hdfd")) {
2879 continue;
2880 }
2881
2882 /* fdisa is the Atari/Milan floppy driver */
2883 if (device_is_a(dv, "fdisa")) {
2884 continue;
2885 }
2886
2887 /* need to find the device_name_to_block_device_major stuff */
2888 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2889
2890 /* get a vnode for the raw partition of this disk */
2891
2892 wedge = device_is_a(dv, "dk");
2893 bminor = minor(device_unit(dv));
2894 dev = wedge ? makedev(bmajor, bminor) :
2895 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2896 if (bdevvp(dev, &vp))
2897 panic("RAID can't alloc vnode");
2898
2899 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2900
2901 if (error) {
2902 /* "Who cares." Continue looking
2903 for something that exists*/
2904 vput(vp);
2905 continue;
2906 }
2907
2908 if (wedge) {
2909 struct dkwedge_info dkw;
2910 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2911 NOCRED, 0);
2912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2913 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2914 vput(vp);
2915 if (error) {
2916 printf("RAIDframe: can't get wedge info for "
2917 "dev %s (%d)\n", dv->dv_xname, error);
2918 continue;
2919 }
2920
2921 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2922 continue;
2923
2924 ac_list = rf_get_component(ac_list, dev, vp,
2925 dv->dv_xname, dkw.dkw_size);
2926 continue;
2927 }
2928
2929 /* Ok, the disk exists. Go get the disklabel. */
2930 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2931 if (error) {
2932 /*
2933 * XXX can't happen - open() would
2934 * have errored out (or faked up one)
2935 */
2936 if (error != ENOTTY)
2937 printf("RAIDframe: can't get label for dev "
2938 "%s (%d)\n", dv->dv_xname, error);
2939 }
2940
2941 /* don't need this any more. We'll allocate it again
2942 a little later if we really do... */
2943 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2944 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2945 vput(vp);
2946
2947 if (error)
2948 continue;
2949
2950 for (i = 0; i < label.d_npartitions; i++) {
2951 char cname[sizeof(ac_list->devname)];
2952
2953 /* We only support partitions marked as RAID */
2954 if (label.d_partitions[i].p_fstype != FS_RAID)
2955 continue;
2956
2957 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2958 if (bdevvp(dev, &vp))
2959 panic("RAID can't alloc vnode");
2960
2961 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2962 if (error) {
2963 /* Whatever... */
2964 vput(vp);
2965 continue;
2966 }
2967 snprintf(cname, sizeof(cname), "%s%c",
2968 dv->dv_xname, 'a' + i);
2969 ac_list = rf_get_component(ac_list, dev, vp, cname,
2970 label.d_partitions[i].p_size);
2971 }
2972 }
2973 return ac_list;
2974 }
2975
2976
2977 static int
2978 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2979 {
2980
2981 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2982 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2983 ((clabel->clean == RF_RAID_CLEAN) ||
2984 (clabel->clean == RF_RAID_DIRTY)) &&
2985 clabel->row >=0 &&
2986 clabel->column >= 0 &&
2987 clabel->num_rows > 0 &&
2988 clabel->num_columns > 0 &&
2989 clabel->row < clabel->num_rows &&
2990 clabel->column < clabel->num_columns &&
2991 clabel->blockSize > 0 &&
2992 clabel->numBlocks > 0) {
2993 /* label looks reasonable enough... */
2994 return(1);
2995 }
2996 return(0);
2997 }
2998
2999
3000 #ifdef DEBUG
3001 void
3002 rf_print_component_label(RF_ComponentLabel_t *clabel)
3003 {
3004 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3005 clabel->row, clabel->column,
3006 clabel->num_rows, clabel->num_columns);
3007 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3008 clabel->version, clabel->serial_number,
3009 clabel->mod_counter);
3010 printf(" Clean: %s Status: %d\n",
3011 clabel->clean ? "Yes" : "No", clabel->status );
3012 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3013 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3014 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
3015 (char) clabel->parityConfig, clabel->blockSize,
3016 clabel->numBlocks);
3017 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
3018 printf(" Contains root partition: %s\n",
3019 clabel->root_partition ? "Yes" : "No" );
3020 printf(" Last configured as: raid%d\n", clabel->last_unit );
3021 #if 0
3022 printf(" Config order: %d\n", clabel->config_order);
3023 #endif
3024
3025 }
3026 #endif
3027
3028 RF_ConfigSet_t *
3029 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3030 {
3031 RF_AutoConfig_t *ac;
3032 RF_ConfigSet_t *config_sets;
3033 RF_ConfigSet_t *cset;
3034 RF_AutoConfig_t *ac_next;
3035
3036
3037 config_sets = NULL;
3038
3039 /* Go through the AutoConfig list, and figure out which components
3040 belong to what sets. */
3041 ac = ac_list;
3042 while(ac!=NULL) {
3043 /* we're going to putz with ac->next, so save it here
3044 for use at the end of the loop */
3045 ac_next = ac->next;
3046
3047 if (config_sets == NULL) {
3048 /* will need at least this one... */
3049 config_sets = (RF_ConfigSet_t *)
3050 malloc(sizeof(RF_ConfigSet_t),
3051 M_RAIDFRAME, M_NOWAIT);
3052 if (config_sets == NULL) {
3053 panic("rf_create_auto_sets: No memory!");
3054 }
3055 /* this one is easy :) */
3056 config_sets->ac = ac;
3057 config_sets->next = NULL;
3058 config_sets->rootable = 0;
3059 ac->next = NULL;
3060 } else {
3061 /* which set does this component fit into? */
3062 cset = config_sets;
3063 while(cset!=NULL) {
3064 if (rf_does_it_fit(cset, ac)) {
3065 /* looks like it matches... */
3066 ac->next = cset->ac;
3067 cset->ac = ac;
3068 break;
3069 }
3070 cset = cset->next;
3071 }
3072 if (cset==NULL) {
3073 /* didn't find a match above... new set..*/
3074 cset = (RF_ConfigSet_t *)
3075 malloc(sizeof(RF_ConfigSet_t),
3076 M_RAIDFRAME, M_NOWAIT);
3077 if (cset == NULL) {
3078 panic("rf_create_auto_sets: No memory!");
3079 }
3080 cset->ac = ac;
3081 ac->next = NULL;
3082 cset->next = config_sets;
3083 cset->rootable = 0;
3084 config_sets = cset;
3085 }
3086 }
3087 ac = ac_next;
3088 }
3089
3090
3091 return(config_sets);
3092 }
3093
3094 static int
3095 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3096 {
3097 RF_ComponentLabel_t *clabel1, *clabel2;
3098
3099 /* If this one matches the *first* one in the set, that's good
3100 enough, since the other members of the set would have been
3101 through here too... */
3102 /* note that we are not checking partitionSize here..
3103
3104 Note that we are also not checking the mod_counters here.
3105 If everything else matches execpt the mod_counter, that's
3106 good enough for this test. We will deal with the mod_counters
3107 a little later in the autoconfiguration process.
3108
3109 (clabel1->mod_counter == clabel2->mod_counter) &&
3110
3111 The reason we don't check for this is that failed disks
3112 will have lower modification counts. If those disks are
3113 not added to the set they used to belong to, then they will
3114 form their own set, which may result in 2 different sets,
3115 for example, competing to be configured at raid0, and
3116 perhaps competing to be the root filesystem set. If the
3117 wrong ones get configured, or both attempt to become /,
3118 weird behaviour and or serious lossage will occur. Thus we
3119 need to bring them into the fold here, and kick them out at
3120 a later point.
3121
3122 */
3123
3124 clabel1 = cset->ac->clabel;
3125 clabel2 = ac->clabel;
3126 if ((clabel1->version == clabel2->version) &&
3127 (clabel1->serial_number == clabel2->serial_number) &&
3128 (clabel1->num_rows == clabel2->num_rows) &&
3129 (clabel1->num_columns == clabel2->num_columns) &&
3130 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3131 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3132 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3133 (clabel1->parityConfig == clabel2->parityConfig) &&
3134 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3135 (clabel1->blockSize == clabel2->blockSize) &&
3136 (clabel1->numBlocks == clabel2->numBlocks) &&
3137 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3138 (clabel1->root_partition == clabel2->root_partition) &&
3139 (clabel1->last_unit == clabel2->last_unit) &&
3140 (clabel1->config_order == clabel2->config_order)) {
3141 /* if it get's here, it almost *has* to be a match */
3142 } else {
3143 /* it's not consistent with somebody in the set..
3144 punt */
3145 return(0);
3146 }
3147 /* all was fine.. it must fit... */
3148 return(1);
3149 }
3150
3151 int
3152 rf_have_enough_components(RF_ConfigSet_t *cset)
3153 {
3154 RF_AutoConfig_t *ac;
3155 RF_AutoConfig_t *auto_config;
3156 RF_ComponentLabel_t *clabel;
3157 int c;
3158 int num_cols;
3159 int num_missing;
3160 int mod_counter;
3161 int mod_counter_found;
3162 int even_pair_failed;
3163 char parity_type;
3164
3165
3166 /* check to see that we have enough 'live' components
3167 of this set. If so, we can configure it if necessary */
3168
3169 num_cols = cset->ac->clabel->num_columns;
3170 parity_type = cset->ac->clabel->parityConfig;
3171
3172 /* XXX Check for duplicate components!?!?!? */
3173
3174 /* Determine what the mod_counter is supposed to be for this set. */
3175
3176 mod_counter_found = 0;
3177 mod_counter = 0;
3178 ac = cset->ac;
3179 while(ac!=NULL) {
3180 if (mod_counter_found==0) {
3181 mod_counter = ac->clabel->mod_counter;
3182 mod_counter_found = 1;
3183 } else {
3184 if (ac->clabel->mod_counter > mod_counter) {
3185 mod_counter = ac->clabel->mod_counter;
3186 }
3187 }
3188 ac = ac->next;
3189 }
3190
3191 num_missing = 0;
3192 auto_config = cset->ac;
3193
3194 even_pair_failed = 0;
3195 for(c=0; c<num_cols; c++) {
3196 ac = auto_config;
3197 while(ac!=NULL) {
3198 if ((ac->clabel->column == c) &&
3199 (ac->clabel->mod_counter == mod_counter)) {
3200 /* it's this one... */
3201 #ifdef DEBUG
3202 printf("Found: %s at %d\n",
3203 ac->devname,c);
3204 #endif
3205 break;
3206 }
3207 ac=ac->next;
3208 }
3209 if (ac==NULL) {
3210 /* Didn't find one here! */
3211 /* special case for RAID 1, especially
3212 where there are more than 2
3213 components (where RAIDframe treats
3214 things a little differently :( ) */
3215 if (parity_type == '1') {
3216 if (c%2 == 0) { /* even component */
3217 even_pair_failed = 1;
3218 } else { /* odd component. If
3219 we're failed, and
3220 so is the even
3221 component, it's
3222 "Good Night, Charlie" */
3223 if (even_pair_failed == 1) {
3224 return(0);
3225 }
3226 }
3227 } else {
3228 /* normal accounting */
3229 num_missing++;
3230 }
3231 }
3232 if ((parity_type == '1') && (c%2 == 1)) {
3233 /* Just did an even component, and we didn't
3234 bail.. reset the even_pair_failed flag,
3235 and go on to the next component.... */
3236 even_pair_failed = 0;
3237 }
3238 }
3239
3240 clabel = cset->ac->clabel;
3241
3242 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3243 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3244 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3245 /* XXX this needs to be made *much* more general */
3246 /* Too many failures */
3247 return(0);
3248 }
3249 /* otherwise, all is well, and we've got enough to take a kick
3250 at autoconfiguring this set */
3251 return(1);
3252 }
3253
3254 void
3255 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3256 RF_Raid_t *raidPtr)
3257 {
3258 RF_ComponentLabel_t *clabel;
3259 int i;
3260
3261 clabel = ac->clabel;
3262
3263 /* 1. Fill in the common stuff */
3264 config->numRow = clabel->num_rows = 1;
3265 config->numCol = clabel->num_columns;
3266 config->numSpare = 0; /* XXX should this be set here? */
3267 config->sectPerSU = clabel->sectPerSU;
3268 config->SUsPerPU = clabel->SUsPerPU;
3269 config->SUsPerRU = clabel->SUsPerRU;
3270 config->parityConfig = clabel->parityConfig;
3271 /* XXX... */
3272 strcpy(config->diskQueueType,"fifo");
3273 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3274 config->layoutSpecificSize = 0; /* XXX ?? */
3275
3276 while(ac!=NULL) {
3277 /* row/col values will be in range due to the checks
3278 in reasonable_label() */
3279 strcpy(config->devnames[0][ac->clabel->column],
3280 ac->devname);
3281 ac = ac->next;
3282 }
3283
3284 for(i=0;i<RF_MAXDBGV;i++) {
3285 config->debugVars[i][0] = 0;
3286 }
3287 }
3288
3289 int
3290 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3291 {
3292 RF_ComponentLabel_t clabel;
3293 struct vnode *vp;
3294 dev_t dev;
3295 int column;
3296 int sparecol;
3297
3298 raidPtr->autoconfigure = new_value;
3299
3300 for(column=0; column<raidPtr->numCol; column++) {
3301 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3302 dev = raidPtr->Disks[column].dev;
3303 vp = raidPtr->raid_cinfo[column].ci_vp;
3304 raidread_component_label(dev, vp, &clabel);
3305 clabel.autoconfigure = new_value;
3306 raidwrite_component_label(dev, vp, &clabel);
3307 }
3308 }
3309 for(column = 0; column < raidPtr->numSpare ; column++) {
3310 sparecol = raidPtr->numCol + column;
3311 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3312 dev = raidPtr->Disks[sparecol].dev;
3313 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3314 raidread_component_label(dev, vp, &clabel);
3315 clabel.autoconfigure = new_value;
3316 raidwrite_component_label(dev, vp, &clabel);
3317 }
3318 }
3319 return(new_value);
3320 }
3321
3322 int
3323 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3324 {
3325 RF_ComponentLabel_t clabel;
3326 struct vnode *vp;
3327 dev_t dev;
3328 int column;
3329 int sparecol;
3330
3331 raidPtr->root_partition = new_value;
3332 for(column=0; column<raidPtr->numCol; column++) {
3333 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3334 dev = raidPtr->Disks[column].dev;
3335 vp = raidPtr->raid_cinfo[column].ci_vp;
3336 raidread_component_label(dev, vp, &clabel);
3337 clabel.root_partition = new_value;
3338 raidwrite_component_label(dev, vp, &clabel);
3339 }
3340 }
3341 for(column = 0; column < raidPtr->numSpare ; column++) {
3342 sparecol = raidPtr->numCol + column;
3343 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3344 dev = raidPtr->Disks[sparecol].dev;
3345 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3346 raidread_component_label(dev, vp, &clabel);
3347 clabel.root_partition = new_value;
3348 raidwrite_component_label(dev, vp, &clabel);
3349 }
3350 }
3351 return(new_value);
3352 }
3353
3354 void
3355 rf_release_all_vps(RF_ConfigSet_t *cset)
3356 {
3357 RF_AutoConfig_t *ac;
3358
3359 ac = cset->ac;
3360 while(ac!=NULL) {
3361 /* Close the vp, and give it back */
3362 if (ac->vp) {
3363 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3364 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3365 vput(ac->vp);
3366 ac->vp = NULL;
3367 }
3368 ac = ac->next;
3369 }
3370 }
3371
3372
3373 void
3374 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3375 {
3376 RF_AutoConfig_t *ac;
3377 RF_AutoConfig_t *next_ac;
3378
3379 ac = cset->ac;
3380 while(ac!=NULL) {
3381 next_ac = ac->next;
3382 /* nuke the label */
3383 free(ac->clabel, M_RAIDFRAME);
3384 /* cleanup the config structure */
3385 free(ac, M_RAIDFRAME);
3386 /* "next.." */
3387 ac = next_ac;
3388 }
3389 /* and, finally, nuke the config set */
3390 free(cset, M_RAIDFRAME);
3391 }
3392
3393
3394 void
3395 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3396 {
3397 /* current version number */
3398 clabel->version = RF_COMPONENT_LABEL_VERSION;
3399 clabel->serial_number = raidPtr->serial_number;
3400 clabel->mod_counter = raidPtr->mod_counter;
3401 clabel->num_rows = 1;
3402 clabel->num_columns = raidPtr->numCol;
3403 clabel->clean = RF_RAID_DIRTY; /* not clean */
3404 clabel->status = rf_ds_optimal; /* "It's good!" */
3405
3406 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3407 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3408 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3409
3410 clabel->blockSize = raidPtr->bytesPerSector;
3411 clabel->numBlocks = raidPtr->sectorsPerDisk;
3412
3413 /* XXX not portable */
3414 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3415 clabel->maxOutstanding = raidPtr->maxOutstanding;
3416 clabel->autoconfigure = raidPtr->autoconfigure;
3417 clabel->root_partition = raidPtr->root_partition;
3418 clabel->last_unit = raidPtr->raidid;
3419 clabel->config_order = raidPtr->config_order;
3420 }
3421
3422 int
3423 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3424 {
3425 RF_Raid_t *raidPtr;
3426 RF_Config_t *config;
3427 int raidID;
3428 int retcode;
3429
3430 #ifdef DEBUG
3431 printf("RAID autoconfigure\n");
3432 #endif
3433
3434 retcode = 0;
3435 *unit = -1;
3436
3437 /* 1. Create a config structure */
3438
3439 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3440 M_RAIDFRAME,
3441 M_NOWAIT);
3442 if (config==NULL) {
3443 printf("Out of mem!?!?\n");
3444 /* XXX do something more intelligent here. */
3445 return(1);
3446 }
3447
3448 memset(config, 0, sizeof(RF_Config_t));
3449
3450 /*
3451 2. Figure out what RAID ID this one is supposed to live at
3452 See if we can get the same RAID dev that it was configured
3453 on last time..
3454 */
3455
3456 raidID = cset->ac->clabel->last_unit;
3457 if ((raidID < 0) || (raidID >= numraid)) {
3458 /* let's not wander off into lala land. */
3459 raidID = numraid - 1;
3460 }
3461 if (raidPtrs[raidID]->valid != 0) {
3462
3463 /*
3464 Nope... Go looking for an alternative...
3465 Start high so we don't immediately use raid0 if that's
3466 not taken.
3467 */
3468
3469 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3470 if (raidPtrs[raidID]->valid == 0) {
3471 /* can use this one! */
3472 break;
3473 }
3474 }
3475 }
3476
3477 if (raidID < 0) {
3478 /* punt... */
3479 printf("Unable to auto configure this set!\n");
3480 printf("(Out of RAID devs!)\n");
3481 free(config, M_RAIDFRAME);
3482 return(1);
3483 }
3484
3485 #ifdef DEBUG
3486 printf("Configuring raid%d:\n",raidID);
3487 #endif
3488
3489 raidPtr = raidPtrs[raidID];
3490
3491 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3492 raidPtr->raidid = raidID;
3493 raidPtr->openings = RAIDOUTSTANDING;
3494
3495 /* 3. Build the configuration structure */
3496 rf_create_configuration(cset->ac, config, raidPtr);
3497
3498 /* 4. Do the configuration */
3499 retcode = rf_Configure(raidPtr, config, cset->ac);
3500
3501 if (retcode == 0) {
3502
3503 raidinit(raidPtrs[raidID]);
3504
3505 rf_markalldirty(raidPtrs[raidID]);
3506 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3507 if (cset->ac->clabel->root_partition==1) {
3508 /* everything configured just fine. Make a note
3509 that this set is eligible to be root. */
3510 cset->rootable = 1;
3511 /* XXX do this here? */
3512 raidPtrs[raidID]->root_partition = 1;
3513 }
3514 }
3515
3516 /* 5. Cleanup */
3517 free(config, M_RAIDFRAME);
3518
3519 *unit = raidID;
3520 return(retcode);
3521 }
3522
3523 void
3524 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3525 {
3526 struct buf *bp;
3527
3528 bp = (struct buf *)desc->bp;
3529 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3530 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3531 }
3532
3533 void
3534 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3535 size_t xmin, size_t xmax)
3536 {
3537 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3538 pool_sethiwat(p, xmax);
3539 pool_prime(p, xmin);
3540 pool_setlowat(p, xmin);
3541 }
3542
3543 /*
3544 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3545 * if there is IO pending and if that IO could possibly be done for a
3546 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3547 * otherwise.
3548 *
3549 */
3550
3551 int
3552 rf_buf_queue_check(int raidid)
3553 {
3554 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3555 raidPtrs[raidid]->openings > 0) {
3556 /* there is work to do */
3557 return 0;
3558 }
3559 /* default is nothing to do */
3560 return 1;
3561 }
3562
3563 int
3564 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3565 {
3566 struct partinfo dpart;
3567 struct dkwedge_info dkw;
3568 int error;
3569
3570 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3571 if (error == 0) {
3572 diskPtr->blockSize = dpart.disklab->d_secsize;
3573 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3574 diskPtr->partitionSize = dpart.part->p_size;
3575 return 0;
3576 }
3577
3578 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3579 if (error == 0) {
3580 diskPtr->blockSize = 512; /* XXX */
3581 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3582 diskPtr->partitionSize = dkw.dkw_size;
3583 return 0;
3584 }
3585 return error;
3586 }
3587
3588 static int
3589 raid_match(struct device *self, struct cfdata *cfdata,
3590 void *aux)
3591 {
3592 return 1;
3593 }
3594
3595 static void
3596 raid_attach(struct device *parent, struct device *self,
3597 void *aux)
3598 {
3599
3600 }
3601
3602
3603 static int
3604 raid_detach(struct device *self, int flags)
3605 {
3606 struct raid_softc *rs = (struct raid_softc *)self;
3607
3608 if (rs->sc_flags & RAIDF_INITED)
3609 return EBUSY;
3610
3611 return 0;
3612 }
3613
3614
3615