rf_netbsdkintf.c revision 1.234 1 /* $NetBSD: rf_netbsdkintf.c,v 1.234 2007/11/01 04:11:22 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.234 2007/11/01 04:11:22 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <prop/proplib.h>
174
175 #include <dev/raidframe/raidframevar.h>
176 #include <dev/raidframe/raidframeio.h>
177 #include "raid.h"
178 #include "opt_raid_autoconfig.h"
179 #include "rf_raid.h"
180 #include "rf_copyback.h"
181 #include "rf_dag.h"
182 #include "rf_dagflags.h"
183 #include "rf_desc.h"
184 #include "rf_diskqueue.h"
185 #include "rf_etimer.h"
186 #include "rf_general.h"
187 #include "rf_kintf.h"
188 #include "rf_options.h"
189 #include "rf_driver.h"
190 #include "rf_parityscan.h"
191 #include "rf_threadstuff.h"
192
193 #ifdef DEBUG
194 int rf_kdebug_level = 0;
195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
196 #else /* DEBUG */
197 #define db1_printf(a) { }
198 #endif /* DEBUG */
199
200 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
201
202 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
203
204 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
205 * spare table */
206 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
207 * installation process */
208
209 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
210
211 /* prototypes */
212 static void KernelWakeupFunc(struct buf *);
213 static void InitBP(struct buf *, struct vnode *, unsigned,
214 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
215 void *, int, struct proc *);
216 static void raidinit(RF_Raid_t *);
217
218 void raidattach(int);
219 static int raid_match(struct device *, struct cfdata *, void *);
220 static void raid_attach(struct device *, struct device *, void *);
221 static int raid_detach(struct device *, int);
222
223 dev_type_open(raidopen);
224 dev_type_close(raidclose);
225 dev_type_read(raidread);
226 dev_type_write(raidwrite);
227 dev_type_ioctl(raidioctl);
228 dev_type_strategy(raidstrategy);
229 dev_type_dump(raiddump);
230 dev_type_size(raidsize);
231
232 const struct bdevsw raid_bdevsw = {
233 raidopen, raidclose, raidstrategy, raidioctl,
234 raiddump, raidsize, D_DISK
235 };
236
237 const struct cdevsw raid_cdevsw = {
238 raidopen, raidclose, raidread, raidwrite, raidioctl,
239 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
240 };
241
242 /* XXX Not sure if the following should be replacing the raidPtrs above,
243 or if it should be used in conjunction with that...
244 */
245
246 struct raid_softc {
247 struct device *sc_dev;
248 int sc_flags; /* flags */
249 int sc_cflags; /* configuration flags */
250 uint64_t sc_size; /* size of the raid device */
251 char sc_xname[20]; /* XXX external name */
252 struct disk sc_dkdev; /* generic disk device info */
253 struct bufq_state *buf_queue; /* used for the device queue */
254 };
255 /* sc_flags */
256 #define RAIDF_INITED 0x01 /* unit has been initialized */
257 #define RAIDF_WLABEL 0x02 /* label area is writable */
258 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
259 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
260 #define RAIDF_LOCKED 0x80 /* unit is locked */
261
262 #define raidunit(x) DISKUNIT(x)
263 int numraid = 0;
264
265 extern struct cfdriver raid_cd;
266 CFATTACH_DECL(raid, sizeof(struct raid_softc),
267 raid_match, raid_attach, raid_detach, NULL);
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294 struct raid_softc *raid_softc;
295
296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
297 struct disklabel *);
298 static void raidgetdisklabel(dev_t);
299 static void raidmakedisklabel(struct raid_softc *);
300
301 static int raidlock(struct raid_softc *);
302 static void raidunlock(struct raid_softc *);
303
304 static void rf_markalldirty(RF_Raid_t *);
305 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
306
307 void rf_ReconThread(struct rf_recon_req *);
308 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
309 void rf_CopybackThread(RF_Raid_t *raidPtr);
310 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
311 int rf_autoconfig(struct device *self);
312 void rf_buildroothack(RF_ConfigSet_t *);
313
314 RF_AutoConfig_t *rf_find_raid_components(void);
315 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
316 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
317 static int rf_reasonable_label(RF_ComponentLabel_t *);
318 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
319 int rf_set_autoconfig(RF_Raid_t *, int);
320 int rf_set_rootpartition(RF_Raid_t *, int);
321 void rf_release_all_vps(RF_ConfigSet_t *);
322 void rf_cleanup_config_set(RF_ConfigSet_t *);
323 int rf_have_enough_components(RF_ConfigSet_t *);
324 int rf_auto_config_set(RF_ConfigSet_t *, int *);
325
326 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
327 allow autoconfig to take place.
328 Note that this is overridden by having
329 RAID_AUTOCONFIG as an option in the
330 kernel config file. */
331
332 struct RF_Pools_s rf_pools;
333
334 void
335 raidattach(int num)
336 {
337 int raidID;
338 int i, rc;
339
340 #ifdef DEBUG
341 printf("raidattach: Asked for %d units\n", num);
342 #endif
343
344 if (num <= 0) {
345 #ifdef DIAGNOSTIC
346 panic("raidattach: count <= 0");
347 #endif
348 return;
349 }
350 /* This is where all the initialization stuff gets done. */
351
352 numraid = num;
353
354 /* Make some space for requested number of units... */
355
356 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
357 if (raidPtrs == NULL) {
358 panic("raidPtrs is NULL!!");
359 }
360
361 rf_mutex_init(&rf_sparet_wait_mutex);
362
363 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
364
365 for (i = 0; i < num; i++)
366 raidPtrs[i] = NULL;
367 rc = rf_BootRaidframe();
368 if (rc == 0)
369 printf("Kernelized RAIDframe activated\n");
370 else
371 panic("Serious error booting RAID!!");
372
373 /* put together some datastructures like the CCD device does.. This
374 * lets us lock the device and what-not when it gets opened. */
375
376 raid_softc = (struct raid_softc *)
377 malloc(num * sizeof(struct raid_softc),
378 M_RAIDFRAME, M_NOWAIT);
379 if (raid_softc == NULL) {
380 printf("WARNING: no memory for RAIDframe driver\n");
381 return;
382 }
383
384 memset(raid_softc, 0, num * sizeof(struct raid_softc));
385
386 for (raidID = 0; raidID < num; raidID++) {
387 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
388
389 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
390 (RF_Raid_t *));
391 if (raidPtrs[raidID] == NULL) {
392 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
393 numraid = raidID;
394 return;
395 }
396 }
397
398 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
399 printf("config_cfattach_attach failed?\n");
400 }
401
402 #ifdef RAID_AUTOCONFIG
403 raidautoconfig = 1;
404 #endif
405
406 /*
407 * Register a finalizer which will be used to auto-config RAID
408 * sets once all real hardware devices have been found.
409 */
410 if (config_finalize_register(NULL, rf_autoconfig) != 0)
411 printf("WARNING: unable to register RAIDframe finalizer\n");
412 }
413
414 int
415 rf_autoconfig(struct device *self)
416 {
417 RF_AutoConfig_t *ac_list;
418 RF_ConfigSet_t *config_sets;
419
420 if (raidautoconfig == 0)
421 return (0);
422
423 /* XXX This code can only be run once. */
424 raidautoconfig = 0;
425
426 /* 1. locate all RAID components on the system */
427 #ifdef DEBUG
428 printf("Searching for RAID components...\n");
429 #endif
430 ac_list = rf_find_raid_components();
431
432 /* 2. Sort them into their respective sets. */
433 config_sets = rf_create_auto_sets(ac_list);
434
435 /*
436 * 3. Evaluate each set andconfigure the valid ones.
437 * This gets done in rf_buildroothack().
438 */
439 rf_buildroothack(config_sets);
440
441 return 1;
442 }
443
444 void
445 rf_buildroothack(RF_ConfigSet_t *config_sets)
446 {
447 RF_ConfigSet_t *cset;
448 RF_ConfigSet_t *next_cset;
449 int retcode;
450 int raidID;
451 int rootID;
452 int col;
453 int num_root;
454 char *devname;
455
456 rootID = 0;
457 num_root = 0;
458 cset = config_sets;
459 while(cset != NULL ) {
460 next_cset = cset->next;
461 if (rf_have_enough_components(cset) &&
462 cset->ac->clabel->autoconfigure==1) {
463 retcode = rf_auto_config_set(cset,&raidID);
464 if (!retcode) {
465 #ifdef DEBUG
466 printf("raid%d: configured ok\n", raidID);
467 #endif
468 if (cset->rootable) {
469 rootID = raidID;
470 num_root++;
471 }
472 } else {
473 /* The autoconfig didn't work :( */
474 #ifdef DEBUG
475 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
476 #endif
477 rf_release_all_vps(cset);
478 }
479 } else {
480 #ifdef DEBUG
481 printf("raid%d: not enough components\n", raidID);
482 #endif
483 /* we're not autoconfiguring this set...
484 release the associated resources */
485 rf_release_all_vps(cset);
486 }
487 /* cleanup */
488 rf_cleanup_config_set(cset);
489 cset = next_cset;
490 }
491
492 /* if the user has specified what the root device should be
493 then we don't touch booted_device or boothowto... */
494
495 if (rootspec != NULL)
496 return;
497
498 /* we found something bootable... */
499
500 if (num_root == 1) {
501 booted_device = raid_softc[rootID].sc_dev;
502 } else if (num_root > 1) {
503
504 /*
505 * Maybe the MD code can help. If it cannot, then
506 * setroot() will discover that we have no
507 * booted_device and will ask the user if nothing was
508 * hardwired in the kernel config file
509 */
510
511 if (booted_device == NULL)
512 cpu_rootconf();
513 if (booted_device == NULL)
514 return;
515
516 num_root = 0;
517 for (raidID = 0; raidID < numraid; raidID++) {
518 if (raidPtrs[raidID]->valid == 0)
519 continue;
520
521 if (raidPtrs[raidID]->root_partition == 0)
522 continue;
523
524 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
525 devname = raidPtrs[raidID]->Disks[col].devname;
526 devname += sizeof("/dev/") - 1;
527 if (strncmp(devname, booted_device->dv_xname,
528 strlen(booted_device->dv_xname)) != 0)
529 continue;
530 #ifdef DEBUG
531 printf("raid%d includes boot device %s\n",
532 raidID, devname);
533 #endif
534 num_root++;
535 rootID = raidID;
536 }
537 }
538
539 if (num_root == 1) {
540 booted_device = raid_softc[rootID].sc_dev;
541 } else {
542 /* we can't guess.. require the user to answer... */
543 boothowto |= RB_ASKNAME;
544 }
545 }
546 }
547
548
549 int
550 raidsize(dev_t dev)
551 {
552 struct raid_softc *rs;
553 struct disklabel *lp;
554 int part, unit, omask, size;
555
556 unit = raidunit(dev);
557 if (unit >= numraid)
558 return (-1);
559 rs = &raid_softc[unit];
560
561 if ((rs->sc_flags & RAIDF_INITED) == 0)
562 return (-1);
563
564 part = DISKPART(dev);
565 omask = rs->sc_dkdev.dk_openmask & (1 << part);
566 lp = rs->sc_dkdev.dk_label;
567
568 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
569 return (-1);
570
571 if (lp->d_partitions[part].p_fstype != FS_SWAP)
572 size = -1;
573 else
574 size = lp->d_partitions[part].p_size *
575 (lp->d_secsize / DEV_BSIZE);
576
577 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
578 return (-1);
579
580 return (size);
581
582 }
583
584 int
585 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
586 {
587 int unit = raidunit(dev);
588 struct raid_softc *rs;
589 const struct bdevsw *bdev;
590 struct disklabel *lp;
591 RF_Raid_t *raidPtr;
592 daddr_t offset;
593 int part, c, sparecol, j, scol, dumpto;
594 int error = 0;
595
596 if (unit >= numraid)
597 return (ENXIO);
598
599 rs = &raid_softc[unit];
600 raidPtr = raidPtrs[unit];
601
602 if ((rs->sc_flags & RAIDF_INITED) == 0)
603 return ENXIO;
604
605 /* we only support dumping to RAID 1 sets */
606 if (raidPtr->Layout.numDataCol != 1 ||
607 raidPtr->Layout.numParityCol != 1)
608 return EINVAL;
609
610
611 if ((error = raidlock(rs)) != 0)
612 return error;
613
614 if (size % DEV_BSIZE != 0) {
615 error = EINVAL;
616 goto out;
617 }
618
619 if (blkno + size / DEV_BSIZE > rs->sc_size) {
620 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
621 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
622 size / DEV_BSIZE, rs->sc_size);
623 error = EINVAL;
624 goto out;
625 }
626
627 part = DISKPART(dev);
628 lp = rs->sc_dkdev.dk_label;
629 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
630
631 /* figure out what device is alive.. */
632
633 /*
634 Look for a component to dump to. The preference for the
635 component to dump to is as follows:
636 1) the master
637 2) a used_spare of the master
638 3) the slave
639 4) a used_spare of the slave
640 */
641
642 dumpto = -1;
643 for (c = 0; c < raidPtr->numCol; c++) {
644 if (raidPtr->Disks[c].status == rf_ds_optimal) {
645 /* this might be the one */
646 dumpto = c;
647 break;
648 }
649 }
650
651 /*
652 At this point we have possibly selected a live master or a
653 live slave. We now check to see if there is a spared
654 master (or a spared slave), if we didn't find a live master
655 or a live slave.
656 */
657
658 for (c = 0; c < raidPtr->numSpare; c++) {
659 sparecol = raidPtr->numCol + c;
660 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
661 /* How about this one? */
662 scol = -1;
663 for(j=0;j<raidPtr->numCol;j++) {
664 if (raidPtr->Disks[j].spareCol == sparecol) {
665 scol = j;
666 break;
667 }
668 }
669 if (scol == 0) {
670 /*
671 We must have found a spared master!
672 We'll take that over anything else
673 found so far. (We couldn't have
674 found a real master before, since
675 this is a used spare, and it's
676 saying that it's replacing the
677 master.) On reboot (with
678 autoconfiguration turned on)
679 sparecol will become the 1st
680 component (component0) of this set.
681 */
682 dumpto = sparecol;
683 break;
684 } else if (scol != -1) {
685 /*
686 Must be a spared slave. We'll dump
687 to that if we havn't found anything
688 else so far.
689 */
690 if (dumpto == -1)
691 dumpto = sparecol;
692 }
693 }
694 }
695
696 if (dumpto == -1) {
697 /* we couldn't find any live components to dump to!?!?
698 */
699 error = EINVAL;
700 goto out;
701 }
702
703 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
704
705 /*
706 Note that blkno is relative to this particular partition.
707 By adding the offset of this partition in the RAID
708 set, and also adding RF_PROTECTED_SECTORS, we get a
709 value that is relative to the partition used for the
710 underlying component.
711 */
712
713 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
714 blkno + offset, va, size);
715
716 out:
717 raidunlock(rs);
718
719 return error;
720 }
721 /* ARGSUSED */
722 int
723 raidopen(dev_t dev, int flags, int fmt,
724 struct lwp *l)
725 {
726 int unit = raidunit(dev);
727 struct raid_softc *rs;
728 struct disklabel *lp;
729 int part, pmask;
730 int error = 0;
731
732 if (unit >= numraid)
733 return (ENXIO);
734 rs = &raid_softc[unit];
735
736 if ((error = raidlock(rs)) != 0)
737 return (error);
738 lp = rs->sc_dkdev.dk_label;
739
740 part = DISKPART(dev);
741
742 /*
743 * If there are wedges, and this is not RAW_PART, then we
744 * need to fail.
745 */
746 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
747 error = EBUSY;
748 goto bad;
749 }
750 pmask = (1 << part);
751
752 if ((rs->sc_flags & RAIDF_INITED) &&
753 (rs->sc_dkdev.dk_openmask == 0))
754 raidgetdisklabel(dev);
755
756 /* make sure that this partition exists */
757
758 if (part != RAW_PART) {
759 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
760 ((part >= lp->d_npartitions) ||
761 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
762 error = ENXIO;
763 goto bad;
764 }
765 }
766 /* Prevent this unit from being unconfigured while open. */
767 switch (fmt) {
768 case S_IFCHR:
769 rs->sc_dkdev.dk_copenmask |= pmask;
770 break;
771
772 case S_IFBLK:
773 rs->sc_dkdev.dk_bopenmask |= pmask;
774 break;
775 }
776
777 if ((rs->sc_dkdev.dk_openmask == 0) &&
778 ((rs->sc_flags & RAIDF_INITED) != 0)) {
779 /* First one... mark things as dirty... Note that we *MUST*
780 have done a configure before this. I DO NOT WANT TO BE
781 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
782 THAT THEY BELONG TOGETHER!!!!! */
783 /* XXX should check to see if we're only open for reading
784 here... If so, we needn't do this, but then need some
785 other way of keeping track of what's happened.. */
786
787 rf_markalldirty( raidPtrs[unit] );
788 }
789
790
791 rs->sc_dkdev.dk_openmask =
792 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
793
794 bad:
795 raidunlock(rs);
796
797 return (error);
798
799
800 }
801 /* ARGSUSED */
802 int
803 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
804 {
805 int unit = raidunit(dev);
806 struct cfdata *cf;
807 struct raid_softc *rs;
808 int error = 0;
809 int part;
810
811 if (unit >= numraid)
812 return (ENXIO);
813 rs = &raid_softc[unit];
814
815 if ((error = raidlock(rs)) != 0)
816 return (error);
817
818 part = DISKPART(dev);
819
820 /* ...that much closer to allowing unconfiguration... */
821 switch (fmt) {
822 case S_IFCHR:
823 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
824 break;
825
826 case S_IFBLK:
827 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
828 break;
829 }
830 rs->sc_dkdev.dk_openmask =
831 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
832
833 if ((rs->sc_dkdev.dk_openmask == 0) &&
834 ((rs->sc_flags & RAIDF_INITED) != 0)) {
835 /* Last one... device is not unconfigured yet.
836 Device shutdown has taken care of setting the
837 clean bits if RAIDF_INITED is not set
838 mark things as clean... */
839
840 rf_update_component_labels(raidPtrs[unit],
841 RF_FINAL_COMPONENT_UPDATE);
842 if (doing_shutdown) {
843 /* last one, and we're going down, so
844 lights out for this RAID set too. */
845 error = rf_Shutdown(raidPtrs[unit]);
846
847 /* It's no longer initialized... */
848 rs->sc_flags &= ~RAIDF_INITED;
849
850 /* detach the device */
851
852 cf = device_cfdata(rs->sc_dev);
853 error = config_detach(rs->sc_dev, DETACH_QUIET);
854 free(cf, M_RAIDFRAME);
855
856 /* Detach the disk. */
857 disk_detach(&rs->sc_dkdev);
858 disk_destroy(&rs->sc_dkdev);
859 }
860 }
861
862 raidunlock(rs);
863 return (0);
864
865 }
866
867 void
868 raidstrategy(struct buf *bp)
869 {
870 int s;
871
872 unsigned int raidID = raidunit(bp->b_dev);
873 RF_Raid_t *raidPtr;
874 struct raid_softc *rs = &raid_softc[raidID];
875 int wlabel;
876
877 if ((rs->sc_flags & RAIDF_INITED) ==0) {
878 bp->b_error = ENXIO;
879 goto done;
880 }
881 if (raidID >= numraid || !raidPtrs[raidID]) {
882 bp->b_error = ENODEV;
883 goto done;
884 }
885 raidPtr = raidPtrs[raidID];
886 if (!raidPtr->valid) {
887 bp->b_error = ENODEV;
888 goto done;
889 }
890 if (bp->b_bcount == 0) {
891 db1_printf(("b_bcount is zero..\n"));
892 goto done;
893 }
894
895 /*
896 * Do bounds checking and adjust transfer. If there's an
897 * error, the bounds check will flag that for us.
898 */
899
900 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
901 if (DISKPART(bp->b_dev) == RAW_PART) {
902 uint64_t size; /* device size in DEV_BSIZE unit */
903
904 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
905 size = raidPtr->totalSectors <<
906 (raidPtr->logBytesPerSector - DEV_BSHIFT);
907 } else {
908 size = raidPtr->totalSectors >>
909 (DEV_BSHIFT - raidPtr->logBytesPerSector);
910 }
911 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
912 goto done;
913 }
914 } else {
915 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
916 db1_printf(("Bounds check failed!!:%d %d\n",
917 (int) bp->b_blkno, (int) wlabel));
918 goto done;
919 }
920 }
921 s = splbio();
922
923 bp->b_resid = 0;
924
925 /* stuff it onto our queue */
926 BUFQ_PUT(rs->buf_queue, bp);
927
928 /* scheduled the IO to happen at the next convenient time */
929 wakeup(&(raidPtrs[raidID]->iodone));
930
931 splx(s);
932 return;
933
934 done:
935 bp->b_resid = bp->b_bcount;
936 biodone(bp);
937 }
938 /* ARGSUSED */
939 int
940 raidread(dev_t dev, struct uio *uio, int flags)
941 {
942 int unit = raidunit(dev);
943 struct raid_softc *rs;
944
945 if (unit >= numraid)
946 return (ENXIO);
947 rs = &raid_softc[unit];
948
949 if ((rs->sc_flags & RAIDF_INITED) == 0)
950 return (ENXIO);
951
952 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
953
954 }
955 /* ARGSUSED */
956 int
957 raidwrite(dev_t dev, struct uio *uio, int flags)
958 {
959 int unit = raidunit(dev);
960 struct raid_softc *rs;
961
962 if (unit >= numraid)
963 return (ENXIO);
964 rs = &raid_softc[unit];
965
966 if ((rs->sc_flags & RAIDF_INITED) == 0)
967 return (ENXIO);
968
969 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
970
971 }
972
973 int
974 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
975 {
976 int unit = raidunit(dev);
977 int error = 0;
978 int part, pmask;
979 struct cfdata *cf;
980 struct raid_softc *rs;
981 RF_Config_t *k_cfg, *u_cfg;
982 RF_Raid_t *raidPtr;
983 RF_RaidDisk_t *diskPtr;
984 RF_AccTotals_t *totals;
985 RF_DeviceConfig_t *d_cfg, **ucfgp;
986 u_char *specific_buf;
987 int retcode = 0;
988 int column;
989 int raidid;
990 struct rf_recon_req *rrcopy, *rr;
991 RF_ComponentLabel_t *clabel;
992 RF_ComponentLabel_t *ci_label;
993 RF_ComponentLabel_t **clabel_ptr;
994 RF_SingleComponent_t *sparePtr,*componentPtr;
995 RF_SingleComponent_t component;
996 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
997 int i, j, d;
998 #ifdef __HAVE_OLD_DISKLABEL
999 struct disklabel newlabel;
1000 #endif
1001 struct dkwedge_info *dkw;
1002
1003 if (unit >= numraid)
1004 return (ENXIO);
1005 rs = &raid_softc[unit];
1006 raidPtr = raidPtrs[unit];
1007
1008 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
1009 (int) DISKPART(dev), (int) unit, (int) cmd));
1010
1011 /* Must be open for writes for these commands... */
1012 switch (cmd) {
1013 #ifdef DIOCGSECTORSIZE
1014 case DIOCGSECTORSIZE:
1015 *(u_int *)data = raidPtr->bytesPerSector;
1016 return 0;
1017 case DIOCGMEDIASIZE:
1018 *(off_t *)data =
1019 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1020 return 0;
1021 #endif
1022 case DIOCSDINFO:
1023 case DIOCWDINFO:
1024 #ifdef __HAVE_OLD_DISKLABEL
1025 case ODIOCWDINFO:
1026 case ODIOCSDINFO:
1027 #endif
1028 case DIOCWLABEL:
1029 case DIOCAWEDGE:
1030 case DIOCDWEDGE:
1031 if ((flag & FWRITE) == 0)
1032 return (EBADF);
1033 }
1034
1035 /* Must be initialized for these... */
1036 switch (cmd) {
1037 case DIOCGDINFO:
1038 case DIOCSDINFO:
1039 case DIOCWDINFO:
1040 #ifdef __HAVE_OLD_DISKLABEL
1041 case ODIOCGDINFO:
1042 case ODIOCWDINFO:
1043 case ODIOCSDINFO:
1044 case ODIOCGDEFLABEL:
1045 #endif
1046 case DIOCGPART:
1047 case DIOCWLABEL:
1048 case DIOCGDEFLABEL:
1049 case DIOCAWEDGE:
1050 case DIOCDWEDGE:
1051 case DIOCLWEDGES:
1052 case RAIDFRAME_SHUTDOWN:
1053 case RAIDFRAME_REWRITEPARITY:
1054 case RAIDFRAME_GET_INFO:
1055 case RAIDFRAME_RESET_ACCTOTALS:
1056 case RAIDFRAME_GET_ACCTOTALS:
1057 case RAIDFRAME_KEEP_ACCTOTALS:
1058 case RAIDFRAME_GET_SIZE:
1059 case RAIDFRAME_FAIL_DISK:
1060 case RAIDFRAME_COPYBACK:
1061 case RAIDFRAME_CHECK_RECON_STATUS:
1062 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1063 case RAIDFRAME_GET_COMPONENT_LABEL:
1064 case RAIDFRAME_SET_COMPONENT_LABEL:
1065 case RAIDFRAME_ADD_HOT_SPARE:
1066 case RAIDFRAME_REMOVE_HOT_SPARE:
1067 case RAIDFRAME_INIT_LABELS:
1068 case RAIDFRAME_REBUILD_IN_PLACE:
1069 case RAIDFRAME_CHECK_PARITY:
1070 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1071 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1072 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1073 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1074 case RAIDFRAME_SET_AUTOCONFIG:
1075 case RAIDFRAME_SET_ROOT:
1076 case RAIDFRAME_DELETE_COMPONENT:
1077 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1078 if ((rs->sc_flags & RAIDF_INITED) == 0)
1079 return (ENXIO);
1080 }
1081
1082 switch (cmd) {
1083
1084 /* configure the system */
1085 case RAIDFRAME_CONFIGURE:
1086
1087 if (raidPtr->valid) {
1088 /* There is a valid RAID set running on this unit! */
1089 printf("raid%d: Device already configured!\n",unit);
1090 return(EINVAL);
1091 }
1092
1093 /* copy-in the configuration information */
1094 /* data points to a pointer to the configuration structure */
1095
1096 u_cfg = *((RF_Config_t **) data);
1097 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1098 if (k_cfg == NULL) {
1099 return (ENOMEM);
1100 }
1101 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1102 if (retcode) {
1103 RF_Free(k_cfg, sizeof(RF_Config_t));
1104 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1105 retcode));
1106 return (retcode);
1107 }
1108 /* allocate a buffer for the layout-specific data, and copy it
1109 * in */
1110 if (k_cfg->layoutSpecificSize) {
1111 if (k_cfg->layoutSpecificSize > 10000) {
1112 /* sanity check */
1113 RF_Free(k_cfg, sizeof(RF_Config_t));
1114 return (EINVAL);
1115 }
1116 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1117 (u_char *));
1118 if (specific_buf == NULL) {
1119 RF_Free(k_cfg, sizeof(RF_Config_t));
1120 return (ENOMEM);
1121 }
1122 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1123 k_cfg->layoutSpecificSize);
1124 if (retcode) {
1125 RF_Free(k_cfg, sizeof(RF_Config_t));
1126 RF_Free(specific_buf,
1127 k_cfg->layoutSpecificSize);
1128 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1129 retcode));
1130 return (retcode);
1131 }
1132 } else
1133 specific_buf = NULL;
1134 k_cfg->layoutSpecific = specific_buf;
1135
1136 /* should do some kind of sanity check on the configuration.
1137 * Store the sum of all the bytes in the last byte? */
1138
1139 /* configure the system */
1140
1141 /*
1142 * Clear the entire RAID descriptor, just to make sure
1143 * there is no stale data left in the case of a
1144 * reconfiguration
1145 */
1146 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1147 raidPtr->raidid = unit;
1148
1149 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1150
1151 if (retcode == 0) {
1152
1153 /* allow this many simultaneous IO's to
1154 this RAID device */
1155 raidPtr->openings = RAIDOUTSTANDING;
1156
1157 raidinit(raidPtr);
1158 rf_markalldirty(raidPtr);
1159 }
1160 /* free the buffers. No return code here. */
1161 if (k_cfg->layoutSpecificSize) {
1162 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1163 }
1164 RF_Free(k_cfg, sizeof(RF_Config_t));
1165
1166 return (retcode);
1167
1168 /* shutdown the system */
1169 case RAIDFRAME_SHUTDOWN:
1170
1171 if ((error = raidlock(rs)) != 0)
1172 return (error);
1173
1174 /*
1175 * If somebody has a partition mounted, we shouldn't
1176 * shutdown.
1177 */
1178
1179 part = DISKPART(dev);
1180 pmask = (1 << part);
1181 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1182 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1183 (rs->sc_dkdev.dk_copenmask & pmask))) {
1184 raidunlock(rs);
1185 return (EBUSY);
1186 }
1187
1188 retcode = rf_Shutdown(raidPtr);
1189
1190 /* It's no longer initialized... */
1191 rs->sc_flags &= ~RAIDF_INITED;
1192
1193 /* free the pseudo device attach bits */
1194
1195 cf = device_cfdata(rs->sc_dev);
1196 /* XXX this causes us to not return any errors
1197 from the above call to rf_Shutdown() */
1198 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1199 free(cf, M_RAIDFRAME);
1200
1201 /* Detach the disk. */
1202 disk_detach(&rs->sc_dkdev);
1203 disk_destroy(&rs->sc_dkdev);
1204
1205 raidunlock(rs);
1206
1207 return (retcode);
1208 case RAIDFRAME_GET_COMPONENT_LABEL:
1209 clabel_ptr = (RF_ComponentLabel_t **) data;
1210 /* need to read the component label for the disk indicated
1211 by row,column in clabel */
1212
1213 /* For practice, let's get it directly fromdisk, rather
1214 than from the in-core copy */
1215 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1216 (RF_ComponentLabel_t *));
1217 if (clabel == NULL)
1218 return (ENOMEM);
1219
1220 retcode = copyin( *clabel_ptr, clabel,
1221 sizeof(RF_ComponentLabel_t));
1222
1223 if (retcode) {
1224 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1225 return(retcode);
1226 }
1227
1228 clabel->row = 0; /* Don't allow looking at anything else.*/
1229
1230 column = clabel->column;
1231
1232 if ((column < 0) || (column >= raidPtr->numCol +
1233 raidPtr->numSpare)) {
1234 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1235 return(EINVAL);
1236 }
1237
1238 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1239 raidPtr->raid_cinfo[column].ci_vp,
1240 clabel );
1241
1242 if (retcode == 0) {
1243 retcode = copyout(clabel, *clabel_ptr,
1244 sizeof(RF_ComponentLabel_t));
1245 }
1246 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1247 return (retcode);
1248
1249 case RAIDFRAME_SET_COMPONENT_LABEL:
1250 clabel = (RF_ComponentLabel_t *) data;
1251
1252 /* XXX check the label for valid stuff... */
1253 /* Note that some things *should not* get modified --
1254 the user should be re-initing the labels instead of
1255 trying to patch things.
1256 */
1257
1258 raidid = raidPtr->raidid;
1259 #ifdef DEBUG
1260 printf("raid%d: Got component label:\n", raidid);
1261 printf("raid%d: Version: %d\n", raidid, clabel->version);
1262 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1263 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1264 printf("raid%d: Column: %d\n", raidid, clabel->column);
1265 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1266 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1267 printf("raid%d: Status: %d\n", raidid, clabel->status);
1268 #endif
1269 clabel->row = 0;
1270 column = clabel->column;
1271
1272 if ((column < 0) || (column >= raidPtr->numCol)) {
1273 return(EINVAL);
1274 }
1275
1276 /* XXX this isn't allowed to do anything for now :-) */
1277
1278 /* XXX and before it is, we need to fill in the rest
1279 of the fields!?!?!?! */
1280 #if 0
1281 raidwrite_component_label(
1282 raidPtr->Disks[column].dev,
1283 raidPtr->raid_cinfo[column].ci_vp,
1284 clabel );
1285 #endif
1286 return (0);
1287
1288 case RAIDFRAME_INIT_LABELS:
1289 clabel = (RF_ComponentLabel_t *) data;
1290 /*
1291 we only want the serial number from
1292 the above. We get all the rest of the information
1293 from the config that was used to create this RAID
1294 set.
1295 */
1296
1297 raidPtr->serial_number = clabel->serial_number;
1298
1299 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1300 (RF_ComponentLabel_t *));
1301 if (ci_label == NULL)
1302 return (ENOMEM);
1303
1304 raid_init_component_label(raidPtr, ci_label);
1305 ci_label->serial_number = clabel->serial_number;
1306 ci_label->row = 0; /* we dont' pretend to support more */
1307
1308 for(column=0;column<raidPtr->numCol;column++) {
1309 diskPtr = &raidPtr->Disks[column];
1310 if (!RF_DEAD_DISK(diskPtr->status)) {
1311 ci_label->partitionSize = diskPtr->partitionSize;
1312 ci_label->column = column;
1313 raidwrite_component_label(
1314 raidPtr->Disks[column].dev,
1315 raidPtr->raid_cinfo[column].ci_vp,
1316 ci_label );
1317 }
1318 }
1319 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1320
1321 return (retcode);
1322 case RAIDFRAME_SET_AUTOCONFIG:
1323 d = rf_set_autoconfig(raidPtr, *(int *) data);
1324 printf("raid%d: New autoconfig value is: %d\n",
1325 raidPtr->raidid, d);
1326 *(int *) data = d;
1327 return (retcode);
1328
1329 case RAIDFRAME_SET_ROOT:
1330 d = rf_set_rootpartition(raidPtr, *(int *) data);
1331 printf("raid%d: New rootpartition value is: %d\n",
1332 raidPtr->raidid, d);
1333 *(int *) data = d;
1334 return (retcode);
1335
1336 /* initialize all parity */
1337 case RAIDFRAME_REWRITEPARITY:
1338
1339 if (raidPtr->Layout.map->faultsTolerated == 0) {
1340 /* Parity for RAID 0 is trivially correct */
1341 raidPtr->parity_good = RF_RAID_CLEAN;
1342 return(0);
1343 }
1344
1345 if (raidPtr->parity_rewrite_in_progress == 1) {
1346 /* Re-write is already in progress! */
1347 return(EINVAL);
1348 }
1349
1350 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1351 rf_RewriteParityThread,
1352 raidPtr,"raid_parity");
1353 return (retcode);
1354
1355
1356 case RAIDFRAME_ADD_HOT_SPARE:
1357 sparePtr = (RF_SingleComponent_t *) data;
1358 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1359 retcode = rf_add_hot_spare(raidPtr, &component);
1360 return(retcode);
1361
1362 case RAIDFRAME_REMOVE_HOT_SPARE:
1363 return(retcode);
1364
1365 case RAIDFRAME_DELETE_COMPONENT:
1366 componentPtr = (RF_SingleComponent_t *)data;
1367 memcpy( &component, componentPtr,
1368 sizeof(RF_SingleComponent_t));
1369 retcode = rf_delete_component(raidPtr, &component);
1370 return(retcode);
1371
1372 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1373 componentPtr = (RF_SingleComponent_t *)data;
1374 memcpy( &component, componentPtr,
1375 sizeof(RF_SingleComponent_t));
1376 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1377 return(retcode);
1378
1379 case RAIDFRAME_REBUILD_IN_PLACE:
1380
1381 if (raidPtr->Layout.map->faultsTolerated == 0) {
1382 /* Can't do this on a RAID 0!! */
1383 return(EINVAL);
1384 }
1385
1386 if (raidPtr->recon_in_progress == 1) {
1387 /* a reconstruct is already in progress! */
1388 return(EINVAL);
1389 }
1390
1391 componentPtr = (RF_SingleComponent_t *) data;
1392 memcpy( &component, componentPtr,
1393 sizeof(RF_SingleComponent_t));
1394 component.row = 0; /* we don't support any more */
1395 column = component.column;
1396
1397 if ((column < 0) || (column >= raidPtr->numCol)) {
1398 return(EINVAL);
1399 }
1400
1401 RF_LOCK_MUTEX(raidPtr->mutex);
1402 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1403 (raidPtr->numFailures > 0)) {
1404 /* XXX 0 above shouldn't be constant!!! */
1405 /* some component other than this has failed.
1406 Let's not make things worse than they already
1407 are... */
1408 printf("raid%d: Unable to reconstruct to disk at:\n",
1409 raidPtr->raidid);
1410 printf("raid%d: Col: %d Too many failures.\n",
1411 raidPtr->raidid, column);
1412 RF_UNLOCK_MUTEX(raidPtr->mutex);
1413 return (EINVAL);
1414 }
1415 if (raidPtr->Disks[column].status ==
1416 rf_ds_reconstructing) {
1417 printf("raid%d: Unable to reconstruct to disk at:\n",
1418 raidPtr->raidid);
1419 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1420
1421 RF_UNLOCK_MUTEX(raidPtr->mutex);
1422 return (EINVAL);
1423 }
1424 if (raidPtr->Disks[column].status == rf_ds_spared) {
1425 RF_UNLOCK_MUTEX(raidPtr->mutex);
1426 return (EINVAL);
1427 }
1428 RF_UNLOCK_MUTEX(raidPtr->mutex);
1429
1430 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1431 if (rrcopy == NULL)
1432 return(ENOMEM);
1433
1434 rrcopy->raidPtr = (void *) raidPtr;
1435 rrcopy->col = column;
1436
1437 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1438 rf_ReconstructInPlaceThread,
1439 rrcopy,"raid_reconip");
1440 return(retcode);
1441
1442 case RAIDFRAME_GET_INFO:
1443 if (!raidPtr->valid)
1444 return (ENODEV);
1445 ucfgp = (RF_DeviceConfig_t **) data;
1446 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1447 (RF_DeviceConfig_t *));
1448 if (d_cfg == NULL)
1449 return (ENOMEM);
1450 d_cfg->rows = 1; /* there is only 1 row now */
1451 d_cfg->cols = raidPtr->numCol;
1452 d_cfg->ndevs = raidPtr->numCol;
1453 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1454 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1455 return (ENOMEM);
1456 }
1457 d_cfg->nspares = raidPtr->numSpare;
1458 if (d_cfg->nspares >= RF_MAX_DISKS) {
1459 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1460 return (ENOMEM);
1461 }
1462 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1463 d = 0;
1464 for (j = 0; j < d_cfg->cols; j++) {
1465 d_cfg->devs[d] = raidPtr->Disks[j];
1466 d++;
1467 }
1468 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1469 d_cfg->spares[i] = raidPtr->Disks[j];
1470 }
1471 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1472 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1473
1474 return (retcode);
1475
1476 case RAIDFRAME_CHECK_PARITY:
1477 *(int *) data = raidPtr->parity_good;
1478 return (0);
1479
1480 case RAIDFRAME_RESET_ACCTOTALS:
1481 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1482 return (0);
1483
1484 case RAIDFRAME_GET_ACCTOTALS:
1485 totals = (RF_AccTotals_t *) data;
1486 *totals = raidPtr->acc_totals;
1487 return (0);
1488
1489 case RAIDFRAME_KEEP_ACCTOTALS:
1490 raidPtr->keep_acc_totals = *(int *)data;
1491 return (0);
1492
1493 case RAIDFRAME_GET_SIZE:
1494 *(int *) data = raidPtr->totalSectors;
1495 return (0);
1496
1497 /* fail a disk & optionally start reconstruction */
1498 case RAIDFRAME_FAIL_DISK:
1499
1500 if (raidPtr->Layout.map->faultsTolerated == 0) {
1501 /* Can't do this on a RAID 0!! */
1502 return(EINVAL);
1503 }
1504
1505 rr = (struct rf_recon_req *) data;
1506 rr->row = 0;
1507 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1508 return (EINVAL);
1509
1510
1511 RF_LOCK_MUTEX(raidPtr->mutex);
1512 if (raidPtr->status == rf_rs_reconstructing) {
1513 /* you can't fail a disk while we're reconstructing! */
1514 /* XXX wrong for RAID6 */
1515 RF_UNLOCK_MUTEX(raidPtr->mutex);
1516 return (EINVAL);
1517 }
1518 if ((raidPtr->Disks[rr->col].status ==
1519 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1520 /* some other component has failed. Let's not make
1521 things worse. XXX wrong for RAID6 */
1522 RF_UNLOCK_MUTEX(raidPtr->mutex);
1523 return (EINVAL);
1524 }
1525 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1526 /* Can't fail a spared disk! */
1527 RF_UNLOCK_MUTEX(raidPtr->mutex);
1528 return (EINVAL);
1529 }
1530 RF_UNLOCK_MUTEX(raidPtr->mutex);
1531
1532 /* make a copy of the recon request so that we don't rely on
1533 * the user's buffer */
1534 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1535 if (rrcopy == NULL)
1536 return(ENOMEM);
1537 memcpy(rrcopy, rr, sizeof(*rr));
1538 rrcopy->raidPtr = (void *) raidPtr;
1539
1540 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1541 rf_ReconThread,
1542 rrcopy,"raid_recon");
1543 return (0);
1544
1545 /* invoke a copyback operation after recon on whatever disk
1546 * needs it, if any */
1547 case RAIDFRAME_COPYBACK:
1548
1549 if (raidPtr->Layout.map->faultsTolerated == 0) {
1550 /* This makes no sense on a RAID 0!! */
1551 return(EINVAL);
1552 }
1553
1554 if (raidPtr->copyback_in_progress == 1) {
1555 /* Copyback is already in progress! */
1556 return(EINVAL);
1557 }
1558
1559 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1560 rf_CopybackThread,
1561 raidPtr,"raid_copyback");
1562 return (retcode);
1563
1564 /* return the percentage completion of reconstruction */
1565 case RAIDFRAME_CHECK_RECON_STATUS:
1566 if (raidPtr->Layout.map->faultsTolerated == 0) {
1567 /* This makes no sense on a RAID 0, so tell the
1568 user it's done. */
1569 *(int *) data = 100;
1570 return(0);
1571 }
1572 if (raidPtr->status != rf_rs_reconstructing)
1573 *(int *) data = 100;
1574 else {
1575 if (raidPtr->reconControl->numRUsTotal > 0) {
1576 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1577 } else {
1578 *(int *) data = 0;
1579 }
1580 }
1581 return (0);
1582 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1583 progressInfoPtr = (RF_ProgressInfo_t **) data;
1584 if (raidPtr->status != rf_rs_reconstructing) {
1585 progressInfo.remaining = 0;
1586 progressInfo.completed = 100;
1587 progressInfo.total = 100;
1588 } else {
1589 progressInfo.total =
1590 raidPtr->reconControl->numRUsTotal;
1591 progressInfo.completed =
1592 raidPtr->reconControl->numRUsComplete;
1593 progressInfo.remaining = progressInfo.total -
1594 progressInfo.completed;
1595 }
1596 retcode = copyout(&progressInfo, *progressInfoPtr,
1597 sizeof(RF_ProgressInfo_t));
1598 return (retcode);
1599
1600 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1601 if (raidPtr->Layout.map->faultsTolerated == 0) {
1602 /* This makes no sense on a RAID 0, so tell the
1603 user it's done. */
1604 *(int *) data = 100;
1605 return(0);
1606 }
1607 if (raidPtr->parity_rewrite_in_progress == 1) {
1608 *(int *) data = 100 *
1609 raidPtr->parity_rewrite_stripes_done /
1610 raidPtr->Layout.numStripe;
1611 } else {
1612 *(int *) data = 100;
1613 }
1614 return (0);
1615
1616 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1617 progressInfoPtr = (RF_ProgressInfo_t **) data;
1618 if (raidPtr->parity_rewrite_in_progress == 1) {
1619 progressInfo.total = raidPtr->Layout.numStripe;
1620 progressInfo.completed =
1621 raidPtr->parity_rewrite_stripes_done;
1622 progressInfo.remaining = progressInfo.total -
1623 progressInfo.completed;
1624 } else {
1625 progressInfo.remaining = 0;
1626 progressInfo.completed = 100;
1627 progressInfo.total = 100;
1628 }
1629 retcode = copyout(&progressInfo, *progressInfoPtr,
1630 sizeof(RF_ProgressInfo_t));
1631 return (retcode);
1632
1633 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1634 if (raidPtr->Layout.map->faultsTolerated == 0) {
1635 /* This makes no sense on a RAID 0 */
1636 *(int *) data = 100;
1637 return(0);
1638 }
1639 if (raidPtr->copyback_in_progress == 1) {
1640 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1641 raidPtr->Layout.numStripe;
1642 } else {
1643 *(int *) data = 100;
1644 }
1645 return (0);
1646
1647 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1648 progressInfoPtr = (RF_ProgressInfo_t **) data;
1649 if (raidPtr->copyback_in_progress == 1) {
1650 progressInfo.total = raidPtr->Layout.numStripe;
1651 progressInfo.completed =
1652 raidPtr->copyback_stripes_done;
1653 progressInfo.remaining = progressInfo.total -
1654 progressInfo.completed;
1655 } else {
1656 progressInfo.remaining = 0;
1657 progressInfo.completed = 100;
1658 progressInfo.total = 100;
1659 }
1660 retcode = copyout(&progressInfo, *progressInfoPtr,
1661 sizeof(RF_ProgressInfo_t));
1662 return (retcode);
1663
1664 /* the sparetable daemon calls this to wait for the kernel to
1665 * need a spare table. this ioctl does not return until a
1666 * spare table is needed. XXX -- calling mpsleep here in the
1667 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1668 * -- I should either compute the spare table in the kernel,
1669 * or have a different -- XXX XXX -- interface (a different
1670 * character device) for delivering the table -- XXX */
1671 #if 0
1672 case RAIDFRAME_SPARET_WAIT:
1673 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1674 while (!rf_sparet_wait_queue)
1675 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1676 waitreq = rf_sparet_wait_queue;
1677 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1678 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1679
1680 /* structure assignment */
1681 *((RF_SparetWait_t *) data) = *waitreq;
1682
1683 RF_Free(waitreq, sizeof(*waitreq));
1684 return (0);
1685
1686 /* wakes up a process waiting on SPARET_WAIT and puts an error
1687 * code in it that will cause the dameon to exit */
1688 case RAIDFRAME_ABORT_SPARET_WAIT:
1689 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1690 waitreq->fcol = -1;
1691 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1692 waitreq->next = rf_sparet_wait_queue;
1693 rf_sparet_wait_queue = waitreq;
1694 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1695 wakeup(&rf_sparet_wait_queue);
1696 return (0);
1697
1698 /* used by the spare table daemon to deliver a spare table
1699 * into the kernel */
1700 case RAIDFRAME_SEND_SPARET:
1701
1702 /* install the spare table */
1703 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1704
1705 /* respond to the requestor. the return status of the spare
1706 * table installation is passed in the "fcol" field */
1707 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1708 waitreq->fcol = retcode;
1709 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1710 waitreq->next = rf_sparet_resp_queue;
1711 rf_sparet_resp_queue = waitreq;
1712 wakeup(&rf_sparet_resp_queue);
1713 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1714
1715 return (retcode);
1716 #endif
1717
1718 default:
1719 break; /* fall through to the os-specific code below */
1720
1721 }
1722
1723 if (!raidPtr->valid)
1724 return (EINVAL);
1725
1726 /*
1727 * Add support for "regular" device ioctls here.
1728 */
1729
1730 switch (cmd) {
1731 case DIOCGDINFO:
1732 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1733 break;
1734 #ifdef __HAVE_OLD_DISKLABEL
1735 case ODIOCGDINFO:
1736 newlabel = *(rs->sc_dkdev.dk_label);
1737 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1738 return ENOTTY;
1739 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1740 break;
1741 #endif
1742
1743 case DIOCGPART:
1744 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1745 ((struct partinfo *) data)->part =
1746 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1747 break;
1748
1749 case DIOCWDINFO:
1750 case DIOCSDINFO:
1751 #ifdef __HAVE_OLD_DISKLABEL
1752 case ODIOCWDINFO:
1753 case ODIOCSDINFO:
1754 #endif
1755 {
1756 struct disklabel *lp;
1757 #ifdef __HAVE_OLD_DISKLABEL
1758 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1759 memset(&newlabel, 0, sizeof newlabel);
1760 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1761 lp = &newlabel;
1762 } else
1763 #endif
1764 lp = (struct disklabel *)data;
1765
1766 if ((error = raidlock(rs)) != 0)
1767 return (error);
1768
1769 rs->sc_flags |= RAIDF_LABELLING;
1770
1771 error = setdisklabel(rs->sc_dkdev.dk_label,
1772 lp, 0, rs->sc_dkdev.dk_cpulabel);
1773 if (error == 0) {
1774 if (cmd == DIOCWDINFO
1775 #ifdef __HAVE_OLD_DISKLABEL
1776 || cmd == ODIOCWDINFO
1777 #endif
1778 )
1779 error = writedisklabel(RAIDLABELDEV(dev),
1780 raidstrategy, rs->sc_dkdev.dk_label,
1781 rs->sc_dkdev.dk_cpulabel);
1782 }
1783 rs->sc_flags &= ~RAIDF_LABELLING;
1784
1785 raidunlock(rs);
1786
1787 if (error)
1788 return (error);
1789 break;
1790 }
1791
1792 case DIOCWLABEL:
1793 if (*(int *) data != 0)
1794 rs->sc_flags |= RAIDF_WLABEL;
1795 else
1796 rs->sc_flags &= ~RAIDF_WLABEL;
1797 break;
1798
1799 case DIOCGDEFLABEL:
1800 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1801 break;
1802
1803 #ifdef __HAVE_OLD_DISKLABEL
1804 case ODIOCGDEFLABEL:
1805 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1806 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1807 return ENOTTY;
1808 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1809 break;
1810 #endif
1811
1812 case DIOCAWEDGE:
1813 case DIOCDWEDGE:
1814 dkw = (void *)data;
1815
1816 /* If the ioctl happens here, the parent is us. */
1817 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1818 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1819
1820 case DIOCLWEDGES:
1821 return dkwedge_list(&rs->sc_dkdev,
1822 (struct dkwedge_list *)data, l);
1823
1824 default:
1825 retcode = ENOTTY;
1826 }
1827 return (retcode);
1828
1829 }
1830
1831
1832 /* raidinit -- complete the rest of the initialization for the
1833 RAIDframe device. */
1834
1835
1836 static void
1837 raidinit(RF_Raid_t *raidPtr)
1838 {
1839 struct cfdata *cf;
1840 struct raid_softc *rs;
1841 int unit;
1842
1843 unit = raidPtr->raidid;
1844
1845 rs = &raid_softc[unit];
1846
1847 /* XXX should check return code first... */
1848 rs->sc_flags |= RAIDF_INITED;
1849
1850 /* XXX doesn't check bounds. */
1851 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1852
1853 /* attach the pseudo device */
1854 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1855 cf->cf_name = raid_cd.cd_name;
1856 cf->cf_atname = raid_cd.cd_name;
1857 cf->cf_unit = unit;
1858 cf->cf_fstate = FSTATE_STAR;
1859
1860 rs->sc_dev = config_attach_pseudo(cf);
1861
1862 if (rs->sc_dev==NULL) {
1863 printf("raid%d: config_attach_pseudo failed\n",
1864 raidPtr->raidid);
1865 }
1866
1867 /* disk_attach actually creates space for the CPU disklabel, among
1868 * other things, so it's critical to call this *BEFORE* we try putzing
1869 * with disklabels. */
1870
1871 disk_init(&rs->sc_dkdev, rs->sc_xname, NULL);
1872 disk_attach(&rs->sc_dkdev);
1873
1874 /* XXX There may be a weird interaction here between this, and
1875 * protectedSectors, as used in RAIDframe. */
1876
1877 rs->sc_size = raidPtr->totalSectors;
1878
1879 dkwedge_discover(&rs->sc_dkdev);
1880
1881 rf_set_properties(rs, raidPtr);
1882
1883 }
1884 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1885 /* wake up the daemon & tell it to get us a spare table
1886 * XXX
1887 * the entries in the queues should be tagged with the raidPtr
1888 * so that in the extremely rare case that two recons happen at once,
1889 * we know for which device were requesting a spare table
1890 * XXX
1891 *
1892 * XXX This code is not currently used. GO
1893 */
1894 int
1895 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1896 {
1897 int retcode;
1898
1899 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1900 req->next = rf_sparet_wait_queue;
1901 rf_sparet_wait_queue = req;
1902 wakeup(&rf_sparet_wait_queue);
1903
1904 /* mpsleep unlocks the mutex */
1905 while (!rf_sparet_resp_queue) {
1906 tsleep(&rf_sparet_resp_queue, PRIBIO,
1907 "raidframe getsparetable", 0);
1908 }
1909 req = rf_sparet_resp_queue;
1910 rf_sparet_resp_queue = req->next;
1911 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1912
1913 retcode = req->fcol;
1914 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1915 * alloc'd */
1916 return (retcode);
1917 }
1918 #endif
1919
1920 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1921 * bp & passes it down.
1922 * any calls originating in the kernel must use non-blocking I/O
1923 * do some extra sanity checking to return "appropriate" error values for
1924 * certain conditions (to make some standard utilities work)
1925 *
1926 * Formerly known as: rf_DoAccessKernel
1927 */
1928 void
1929 raidstart(RF_Raid_t *raidPtr)
1930 {
1931 RF_SectorCount_t num_blocks, pb, sum;
1932 RF_RaidAddr_t raid_addr;
1933 struct partition *pp;
1934 daddr_t blocknum;
1935 int unit;
1936 struct raid_softc *rs;
1937 int do_async;
1938 struct buf *bp;
1939 int rc;
1940
1941 unit = raidPtr->raidid;
1942 rs = &raid_softc[unit];
1943
1944 /* quick check to see if anything has died recently */
1945 RF_LOCK_MUTEX(raidPtr->mutex);
1946 if (raidPtr->numNewFailures > 0) {
1947 RF_UNLOCK_MUTEX(raidPtr->mutex);
1948 rf_update_component_labels(raidPtr,
1949 RF_NORMAL_COMPONENT_UPDATE);
1950 RF_LOCK_MUTEX(raidPtr->mutex);
1951 raidPtr->numNewFailures--;
1952 }
1953
1954 /* Check to see if we're at the limit... */
1955 while (raidPtr->openings > 0) {
1956 RF_UNLOCK_MUTEX(raidPtr->mutex);
1957
1958 /* get the next item, if any, from the queue */
1959 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1960 /* nothing more to do */
1961 return;
1962 }
1963
1964 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1965 * partition.. Need to make it absolute to the underlying
1966 * device.. */
1967
1968 blocknum = bp->b_blkno;
1969 if (DISKPART(bp->b_dev) != RAW_PART) {
1970 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1971 blocknum += pp->p_offset;
1972 }
1973
1974 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1975 (int) blocknum));
1976
1977 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1978 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1979
1980 /* *THIS* is where we adjust what block we're going to...
1981 * but DO NOT TOUCH bp->b_blkno!!! */
1982 raid_addr = blocknum;
1983
1984 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1985 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1986 sum = raid_addr + num_blocks + pb;
1987 if (1 || rf_debugKernelAccess) {
1988 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1989 (int) raid_addr, (int) sum, (int) num_blocks,
1990 (int) pb, (int) bp->b_resid));
1991 }
1992 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1993 || (sum < num_blocks) || (sum < pb)) {
1994 bp->b_error = ENOSPC;
1995 bp->b_resid = bp->b_bcount;
1996 biodone(bp);
1997 RF_LOCK_MUTEX(raidPtr->mutex);
1998 continue;
1999 }
2000 /*
2001 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2002 */
2003
2004 if (bp->b_bcount & raidPtr->sectorMask) {
2005 bp->b_error = EINVAL;
2006 bp->b_resid = bp->b_bcount;
2007 biodone(bp);
2008 RF_LOCK_MUTEX(raidPtr->mutex);
2009 continue;
2010
2011 }
2012 db1_printf(("Calling DoAccess..\n"));
2013
2014
2015 RF_LOCK_MUTEX(raidPtr->mutex);
2016 raidPtr->openings--;
2017 RF_UNLOCK_MUTEX(raidPtr->mutex);
2018
2019 /*
2020 * Everything is async.
2021 */
2022 do_async = 1;
2023
2024 disk_busy(&rs->sc_dkdev);
2025
2026 /* XXX we're still at splbio() here... do we *really*
2027 need to be? */
2028
2029 /* don't ever condition on bp->b_flags & B_WRITE.
2030 * always condition on B_READ instead */
2031
2032 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2033 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2034 do_async, raid_addr, num_blocks,
2035 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2036
2037 if (rc) {
2038 bp->b_error = rc;
2039 bp->b_resid = bp->b_bcount;
2040 biodone(bp);
2041 /* continue loop */
2042 }
2043
2044 RF_LOCK_MUTEX(raidPtr->mutex);
2045 }
2046 RF_UNLOCK_MUTEX(raidPtr->mutex);
2047 }
2048
2049
2050
2051
2052 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2053
2054 int
2055 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2056 {
2057 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2058 struct buf *bp;
2059
2060 req->queue = queue;
2061
2062 #if DIAGNOSTIC
2063 if (queue->raidPtr->raidid >= numraid) {
2064 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
2065 numraid);
2066 panic("Invalid Unit number in rf_DispatchKernelIO");
2067 }
2068 #endif
2069
2070 bp = req->bp;
2071
2072 switch (req->type) {
2073 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2074 /* XXX need to do something extra here.. */
2075 /* I'm leaving this in, as I've never actually seen it used,
2076 * and I'd like folks to report it... GO */
2077 printf(("WAKEUP CALLED\n"));
2078 queue->numOutstanding++;
2079
2080 bp->b_flags = 0;
2081 bp->b_private = req;
2082
2083 KernelWakeupFunc(bp);
2084 break;
2085
2086 case RF_IO_TYPE_READ:
2087 case RF_IO_TYPE_WRITE:
2088 #if RF_ACC_TRACE > 0
2089 if (req->tracerec) {
2090 RF_ETIMER_START(req->tracerec->timer);
2091 }
2092 #endif
2093 InitBP(bp, queue->rf_cinfo->ci_vp,
2094 op, queue->rf_cinfo->ci_dev,
2095 req->sectorOffset, req->numSector,
2096 req->buf, KernelWakeupFunc, (void *) req,
2097 queue->raidPtr->logBytesPerSector, req->b_proc);
2098
2099 if (rf_debugKernelAccess) {
2100 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2101 (long) bp->b_blkno));
2102 }
2103 queue->numOutstanding++;
2104 queue->last_deq_sector = req->sectorOffset;
2105 /* acc wouldn't have been let in if there were any pending
2106 * reqs at any other priority */
2107 queue->curPriority = req->priority;
2108
2109 db1_printf(("Going for %c to unit %d col %d\n",
2110 req->type, queue->raidPtr->raidid,
2111 queue->col));
2112 db1_printf(("sector %d count %d (%d bytes) %d\n",
2113 (int) req->sectorOffset, (int) req->numSector,
2114 (int) (req->numSector <<
2115 queue->raidPtr->logBytesPerSector),
2116 (int) queue->raidPtr->logBytesPerSector));
2117 VOP_STRATEGY(bp->b_vp, bp);
2118
2119 break;
2120
2121 default:
2122 panic("bad req->type in rf_DispatchKernelIO");
2123 }
2124 db1_printf(("Exiting from DispatchKernelIO\n"));
2125
2126 return (0);
2127 }
2128 /* this is the callback function associated with a I/O invoked from
2129 kernel code.
2130 */
2131 static void
2132 KernelWakeupFunc(struct buf *bp)
2133 {
2134 RF_DiskQueueData_t *req = NULL;
2135 RF_DiskQueue_t *queue;
2136 int s;
2137
2138 s = splbio();
2139 db1_printf(("recovering the request queue:\n"));
2140 req = bp->b_private;
2141
2142 queue = (RF_DiskQueue_t *) req->queue;
2143
2144 #if RF_ACC_TRACE > 0
2145 if (req->tracerec) {
2146 RF_ETIMER_STOP(req->tracerec->timer);
2147 RF_ETIMER_EVAL(req->tracerec->timer);
2148 RF_LOCK_MUTEX(rf_tracing_mutex);
2149 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2150 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2151 req->tracerec->num_phys_ios++;
2152 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2153 }
2154 #endif
2155
2156 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2157 * ballistic, and mark the component as hosed... */
2158
2159 if (bp->b_error != 0) {
2160 /* Mark the disk as dead */
2161 /* but only mark it once... */
2162 /* and only if it wouldn't leave this RAID set
2163 completely broken */
2164 if (((queue->raidPtr->Disks[queue->col].status ==
2165 rf_ds_optimal) ||
2166 (queue->raidPtr->Disks[queue->col].status ==
2167 rf_ds_used_spare)) &&
2168 (queue->raidPtr->numFailures <
2169 queue->raidPtr->Layout.map->faultsTolerated)) {
2170 printf("raid%d: IO Error. Marking %s as failed.\n",
2171 queue->raidPtr->raidid,
2172 queue->raidPtr->Disks[queue->col].devname);
2173 queue->raidPtr->Disks[queue->col].status =
2174 rf_ds_failed;
2175 queue->raidPtr->status = rf_rs_degraded;
2176 queue->raidPtr->numFailures++;
2177 queue->raidPtr->numNewFailures++;
2178 } else { /* Disk is already dead... */
2179 /* printf("Disk already marked as dead!\n"); */
2180 }
2181
2182 }
2183
2184 /* Fill in the error value */
2185
2186 req->error = bp->b_error;
2187
2188 simple_lock(&queue->raidPtr->iodone_lock);
2189
2190 /* Drop this one on the "finished" queue... */
2191 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2192
2193 /* Let the raidio thread know there is work to be done. */
2194 wakeup(&(queue->raidPtr->iodone));
2195
2196 simple_unlock(&queue->raidPtr->iodone_lock);
2197
2198 splx(s);
2199 }
2200
2201
2202
2203 /*
2204 * initialize a buf structure for doing an I/O in the kernel.
2205 */
2206 static void
2207 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2208 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2209 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2210 struct proc *b_proc)
2211 {
2212 /* bp->b_flags = B_PHYS | rw_flag; */
2213 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2214 bp->b_bcount = numSect << logBytesPerSector;
2215 bp->b_bufsize = bp->b_bcount;
2216 bp->b_error = 0;
2217 bp->b_dev = dev;
2218 bp->b_data = bf;
2219 bp->b_blkno = startSect;
2220 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2221 if (bp->b_bcount == 0) {
2222 panic("bp->b_bcount is zero in InitBP!!");
2223 }
2224 bp->b_proc = b_proc;
2225 bp->b_iodone = cbFunc;
2226 bp->b_private = cbArg;
2227 bp->b_vp = b_vp;
2228 if ((bp->b_flags & B_READ) == 0) {
2229 bp->b_vp->v_numoutput++;
2230 }
2231
2232 }
2233
2234 static void
2235 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2236 struct disklabel *lp)
2237 {
2238 memset(lp, 0, sizeof(*lp));
2239
2240 /* fabricate a label... */
2241 lp->d_secperunit = raidPtr->totalSectors;
2242 lp->d_secsize = raidPtr->bytesPerSector;
2243 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2244 lp->d_ntracks = 4 * raidPtr->numCol;
2245 lp->d_ncylinders = raidPtr->totalSectors /
2246 (lp->d_nsectors * lp->d_ntracks);
2247 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2248
2249 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2250 lp->d_type = DTYPE_RAID;
2251 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2252 lp->d_rpm = 3600;
2253 lp->d_interleave = 1;
2254 lp->d_flags = 0;
2255
2256 lp->d_partitions[RAW_PART].p_offset = 0;
2257 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2258 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2259 lp->d_npartitions = RAW_PART + 1;
2260
2261 lp->d_magic = DISKMAGIC;
2262 lp->d_magic2 = DISKMAGIC;
2263 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2264
2265 }
2266 /*
2267 * Read the disklabel from the raid device. If one is not present, fake one
2268 * up.
2269 */
2270 static void
2271 raidgetdisklabel(dev_t dev)
2272 {
2273 int unit = raidunit(dev);
2274 struct raid_softc *rs = &raid_softc[unit];
2275 const char *errstring;
2276 struct disklabel *lp = rs->sc_dkdev.dk_label;
2277 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2278 RF_Raid_t *raidPtr;
2279
2280 db1_printf(("Getting the disklabel...\n"));
2281
2282 memset(clp, 0, sizeof(*clp));
2283
2284 raidPtr = raidPtrs[unit];
2285
2286 raidgetdefaultlabel(raidPtr, rs, lp);
2287
2288 /*
2289 * Call the generic disklabel extraction routine.
2290 */
2291 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2292 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2293 if (errstring)
2294 raidmakedisklabel(rs);
2295 else {
2296 int i;
2297 struct partition *pp;
2298
2299 /*
2300 * Sanity check whether the found disklabel is valid.
2301 *
2302 * This is necessary since total size of the raid device
2303 * may vary when an interleave is changed even though exactly
2304 * same components are used, and old disklabel may used
2305 * if that is found.
2306 */
2307 if (lp->d_secperunit != rs->sc_size)
2308 printf("raid%d: WARNING: %s: "
2309 "total sector size in disklabel (%d) != "
2310 "the size of raid (%ld)\n", unit, rs->sc_xname,
2311 lp->d_secperunit, (long) rs->sc_size);
2312 for (i = 0; i < lp->d_npartitions; i++) {
2313 pp = &lp->d_partitions[i];
2314 if (pp->p_offset + pp->p_size > rs->sc_size)
2315 printf("raid%d: WARNING: %s: end of partition `%c' "
2316 "exceeds the size of raid (%ld)\n",
2317 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2318 }
2319 }
2320
2321 }
2322 /*
2323 * Take care of things one might want to take care of in the event
2324 * that a disklabel isn't present.
2325 */
2326 static void
2327 raidmakedisklabel(struct raid_softc *rs)
2328 {
2329 struct disklabel *lp = rs->sc_dkdev.dk_label;
2330 db1_printf(("Making a label..\n"));
2331
2332 /*
2333 * For historical reasons, if there's no disklabel present
2334 * the raw partition must be marked FS_BSDFFS.
2335 */
2336
2337 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2338
2339 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2340
2341 lp->d_checksum = dkcksum(lp);
2342 }
2343 /*
2344 * Wait interruptibly for an exclusive lock.
2345 *
2346 * XXX
2347 * Several drivers do this; it should be abstracted and made MP-safe.
2348 * (Hmm... where have we seen this warning before :-> GO )
2349 */
2350 static int
2351 raidlock(struct raid_softc *rs)
2352 {
2353 int error;
2354
2355 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2356 rs->sc_flags |= RAIDF_WANTED;
2357 if ((error =
2358 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2359 return (error);
2360 }
2361 rs->sc_flags |= RAIDF_LOCKED;
2362 return (0);
2363 }
2364 /*
2365 * Unlock and wake up any waiters.
2366 */
2367 static void
2368 raidunlock(struct raid_softc *rs)
2369 {
2370
2371 rs->sc_flags &= ~RAIDF_LOCKED;
2372 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2373 rs->sc_flags &= ~RAIDF_WANTED;
2374 wakeup(rs);
2375 }
2376 }
2377
2378
2379 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2380 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2381
2382 int
2383 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2384 {
2385 RF_ComponentLabel_t clabel;
2386 raidread_component_label(dev, b_vp, &clabel);
2387 clabel.mod_counter = mod_counter;
2388 clabel.clean = RF_RAID_CLEAN;
2389 raidwrite_component_label(dev, b_vp, &clabel);
2390 return(0);
2391 }
2392
2393
2394 int
2395 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2396 {
2397 RF_ComponentLabel_t clabel;
2398 raidread_component_label(dev, b_vp, &clabel);
2399 clabel.mod_counter = mod_counter;
2400 clabel.clean = RF_RAID_DIRTY;
2401 raidwrite_component_label(dev, b_vp, &clabel);
2402 return(0);
2403 }
2404
2405 /* ARGSUSED */
2406 int
2407 raidread_component_label(dev_t dev, struct vnode *b_vp,
2408 RF_ComponentLabel_t *clabel)
2409 {
2410 struct buf *bp;
2411 const struct bdevsw *bdev;
2412 int error;
2413
2414 /* XXX should probably ensure that we don't try to do this if
2415 someone has changed rf_protected_sectors. */
2416
2417 if (b_vp == NULL) {
2418 /* For whatever reason, this component is not valid.
2419 Don't try to read a component label from it. */
2420 return(EINVAL);
2421 }
2422
2423 /* get a block of the appropriate size... */
2424 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2425 bp->b_dev = dev;
2426
2427 /* get our ducks in a row for the read */
2428 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2429 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2430 bp->b_flags |= B_READ;
2431 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2432
2433 bdev = bdevsw_lookup(bp->b_dev);
2434 if (bdev == NULL)
2435 return (ENXIO);
2436 (*bdev->d_strategy)(bp);
2437
2438 error = biowait(bp);
2439
2440 if (!error) {
2441 memcpy(clabel, bp->b_data,
2442 sizeof(RF_ComponentLabel_t));
2443 }
2444
2445 brelse(bp, 0);
2446 return(error);
2447 }
2448 /* ARGSUSED */
2449 int
2450 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2451 RF_ComponentLabel_t *clabel)
2452 {
2453 struct buf *bp;
2454 const struct bdevsw *bdev;
2455 int error;
2456
2457 /* get a block of the appropriate size... */
2458 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2459 bp->b_dev = dev;
2460
2461 /* get our ducks in a row for the write */
2462 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2463 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2464 bp->b_flags |= B_WRITE;
2465 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2466
2467 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2468
2469 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2470
2471 bdev = bdevsw_lookup(bp->b_dev);
2472 if (bdev == NULL)
2473 return (ENXIO);
2474 (*bdev->d_strategy)(bp);
2475 error = biowait(bp);
2476 brelse(bp, 0);
2477 if (error) {
2478 #if 1
2479 printf("Failed to write RAID component info!\n");
2480 #endif
2481 }
2482
2483 return(error);
2484 }
2485
2486 void
2487 rf_markalldirty(RF_Raid_t *raidPtr)
2488 {
2489 RF_ComponentLabel_t clabel;
2490 int sparecol;
2491 int c;
2492 int j;
2493 int scol = -1;
2494
2495 raidPtr->mod_counter++;
2496 for (c = 0; c < raidPtr->numCol; c++) {
2497 /* we don't want to touch (at all) a disk that has
2498 failed */
2499 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2500 raidread_component_label(
2501 raidPtr->Disks[c].dev,
2502 raidPtr->raid_cinfo[c].ci_vp,
2503 &clabel);
2504 if (clabel.status == rf_ds_spared) {
2505 /* XXX do something special...
2506 but whatever you do, don't
2507 try to access it!! */
2508 } else {
2509 raidmarkdirty(
2510 raidPtr->Disks[c].dev,
2511 raidPtr->raid_cinfo[c].ci_vp,
2512 raidPtr->mod_counter);
2513 }
2514 }
2515 }
2516
2517 for( c = 0; c < raidPtr->numSpare ; c++) {
2518 sparecol = raidPtr->numCol + c;
2519 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2520 /*
2521
2522 we claim this disk is "optimal" if it's
2523 rf_ds_used_spare, as that means it should be
2524 directly substitutable for the disk it replaced.
2525 We note that too...
2526
2527 */
2528
2529 for(j=0;j<raidPtr->numCol;j++) {
2530 if (raidPtr->Disks[j].spareCol == sparecol) {
2531 scol = j;
2532 break;
2533 }
2534 }
2535
2536 raidread_component_label(
2537 raidPtr->Disks[sparecol].dev,
2538 raidPtr->raid_cinfo[sparecol].ci_vp,
2539 &clabel);
2540 /* make sure status is noted */
2541
2542 raid_init_component_label(raidPtr, &clabel);
2543
2544 clabel.row = 0;
2545 clabel.column = scol;
2546 /* Note: we *don't* change status from rf_ds_used_spare
2547 to rf_ds_optimal */
2548 /* clabel.status = rf_ds_optimal; */
2549
2550 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2551 raidPtr->raid_cinfo[sparecol].ci_vp,
2552 raidPtr->mod_counter);
2553 }
2554 }
2555 }
2556
2557
2558 void
2559 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2560 {
2561 RF_ComponentLabel_t clabel;
2562 int sparecol;
2563 int c;
2564 int j;
2565 int scol;
2566
2567 scol = -1;
2568
2569 /* XXX should do extra checks to make sure things really are clean,
2570 rather than blindly setting the clean bit... */
2571
2572 raidPtr->mod_counter++;
2573
2574 for (c = 0; c < raidPtr->numCol; c++) {
2575 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2576 raidread_component_label(
2577 raidPtr->Disks[c].dev,
2578 raidPtr->raid_cinfo[c].ci_vp,
2579 &clabel);
2580 /* make sure status is noted */
2581 clabel.status = rf_ds_optimal;
2582
2583 /* bump the counter */
2584 clabel.mod_counter = raidPtr->mod_counter;
2585
2586 /* note what unit we are configured as */
2587 clabel.last_unit = raidPtr->raidid;
2588
2589 raidwrite_component_label(
2590 raidPtr->Disks[c].dev,
2591 raidPtr->raid_cinfo[c].ci_vp,
2592 &clabel);
2593 if (final == RF_FINAL_COMPONENT_UPDATE) {
2594 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2595 raidmarkclean(
2596 raidPtr->Disks[c].dev,
2597 raidPtr->raid_cinfo[c].ci_vp,
2598 raidPtr->mod_counter);
2599 }
2600 }
2601 }
2602 /* else we don't touch it.. */
2603 }
2604
2605 for( c = 0; c < raidPtr->numSpare ; c++) {
2606 sparecol = raidPtr->numCol + c;
2607 /* Need to ensure that the reconstruct actually completed! */
2608 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2609 /*
2610
2611 we claim this disk is "optimal" if it's
2612 rf_ds_used_spare, as that means it should be
2613 directly substitutable for the disk it replaced.
2614 We note that too...
2615
2616 */
2617
2618 for(j=0;j<raidPtr->numCol;j++) {
2619 if (raidPtr->Disks[j].spareCol == sparecol) {
2620 scol = j;
2621 break;
2622 }
2623 }
2624
2625 /* XXX shouldn't *really* need this... */
2626 raidread_component_label(
2627 raidPtr->Disks[sparecol].dev,
2628 raidPtr->raid_cinfo[sparecol].ci_vp,
2629 &clabel);
2630 /* make sure status is noted */
2631
2632 raid_init_component_label(raidPtr, &clabel);
2633
2634 clabel.mod_counter = raidPtr->mod_counter;
2635 clabel.column = scol;
2636 clabel.status = rf_ds_optimal;
2637 clabel.last_unit = raidPtr->raidid;
2638
2639 raidwrite_component_label(
2640 raidPtr->Disks[sparecol].dev,
2641 raidPtr->raid_cinfo[sparecol].ci_vp,
2642 &clabel);
2643 if (final == RF_FINAL_COMPONENT_UPDATE) {
2644 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2645 raidmarkclean( raidPtr->Disks[sparecol].dev,
2646 raidPtr->raid_cinfo[sparecol].ci_vp,
2647 raidPtr->mod_counter);
2648 }
2649 }
2650 }
2651 }
2652 }
2653
2654 void
2655 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2656 {
2657
2658 if (vp != NULL) {
2659 if (auto_configured == 1) {
2660 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2661 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2662 vput(vp);
2663
2664 } else {
2665 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred, curlwp);
2666 }
2667 }
2668 }
2669
2670
2671 void
2672 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2673 {
2674 int r,c;
2675 struct vnode *vp;
2676 int acd;
2677
2678
2679 /* We take this opportunity to close the vnodes like we should.. */
2680
2681 for (c = 0; c < raidPtr->numCol; c++) {
2682 vp = raidPtr->raid_cinfo[c].ci_vp;
2683 acd = raidPtr->Disks[c].auto_configured;
2684 rf_close_component(raidPtr, vp, acd);
2685 raidPtr->raid_cinfo[c].ci_vp = NULL;
2686 raidPtr->Disks[c].auto_configured = 0;
2687 }
2688
2689 for (r = 0; r < raidPtr->numSpare; r++) {
2690 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2691 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2692 rf_close_component(raidPtr, vp, acd);
2693 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2694 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2695 }
2696 }
2697
2698
2699 void
2700 rf_ReconThread(struct rf_recon_req *req)
2701 {
2702 int s;
2703 RF_Raid_t *raidPtr;
2704
2705 s = splbio();
2706 raidPtr = (RF_Raid_t *) req->raidPtr;
2707 raidPtr->recon_in_progress = 1;
2708
2709 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2710 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2711
2712 RF_Free(req, sizeof(*req));
2713
2714 raidPtr->recon_in_progress = 0;
2715 splx(s);
2716
2717 /* That's all... */
2718 kthread_exit(0); /* does not return */
2719 }
2720
2721 void
2722 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2723 {
2724 int retcode;
2725 int s;
2726
2727 raidPtr->parity_rewrite_stripes_done = 0;
2728 raidPtr->parity_rewrite_in_progress = 1;
2729 s = splbio();
2730 retcode = rf_RewriteParity(raidPtr);
2731 splx(s);
2732 if (retcode) {
2733 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2734 } else {
2735 /* set the clean bit! If we shutdown correctly,
2736 the clean bit on each component label will get
2737 set */
2738 raidPtr->parity_good = RF_RAID_CLEAN;
2739 }
2740 raidPtr->parity_rewrite_in_progress = 0;
2741
2742 /* Anyone waiting for us to stop? If so, inform them... */
2743 if (raidPtr->waitShutdown) {
2744 wakeup(&raidPtr->parity_rewrite_in_progress);
2745 }
2746
2747 /* That's all... */
2748 kthread_exit(0); /* does not return */
2749 }
2750
2751
2752 void
2753 rf_CopybackThread(RF_Raid_t *raidPtr)
2754 {
2755 int s;
2756
2757 raidPtr->copyback_in_progress = 1;
2758 s = splbio();
2759 rf_CopybackReconstructedData(raidPtr);
2760 splx(s);
2761 raidPtr->copyback_in_progress = 0;
2762
2763 /* That's all... */
2764 kthread_exit(0); /* does not return */
2765 }
2766
2767
2768 void
2769 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2770 {
2771 int s;
2772 RF_Raid_t *raidPtr;
2773
2774 s = splbio();
2775 raidPtr = req->raidPtr;
2776 raidPtr->recon_in_progress = 1;
2777 rf_ReconstructInPlace(raidPtr, req->col);
2778 RF_Free(req, sizeof(*req));
2779 raidPtr->recon_in_progress = 0;
2780 splx(s);
2781
2782 /* That's all... */
2783 kthread_exit(0); /* does not return */
2784 }
2785
2786 static RF_AutoConfig_t *
2787 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2788 const char *cname, RF_SectorCount_t size)
2789 {
2790 int good_one = 0;
2791 RF_ComponentLabel_t *clabel;
2792 RF_AutoConfig_t *ac;
2793
2794 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2795 if (clabel == NULL) {
2796 oomem:
2797 while(ac_list) {
2798 ac = ac_list;
2799 if (ac->clabel)
2800 free(ac->clabel, M_RAIDFRAME);
2801 ac_list = ac_list->next;
2802 free(ac, M_RAIDFRAME);
2803 }
2804 printf("RAID auto config: out of memory!\n");
2805 return NULL; /* XXX probably should panic? */
2806 }
2807
2808 if (!raidread_component_label(dev, vp, clabel)) {
2809 /* Got the label. Does it look reasonable? */
2810 if (rf_reasonable_label(clabel) &&
2811 (clabel->partitionSize <= size)) {
2812 #ifdef DEBUG
2813 printf("Component on: %s: %llu\n",
2814 cname, (unsigned long long)size);
2815 rf_print_component_label(clabel);
2816 #endif
2817 /* if it's reasonable, add it, else ignore it. */
2818 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2819 M_NOWAIT);
2820 if (ac == NULL) {
2821 free(clabel, M_RAIDFRAME);
2822 goto oomem;
2823 }
2824 strlcpy(ac->devname, cname, sizeof(ac->devname));
2825 ac->dev = dev;
2826 ac->vp = vp;
2827 ac->clabel = clabel;
2828 ac->next = ac_list;
2829 ac_list = ac;
2830 good_one = 1;
2831 }
2832 }
2833 if (!good_one) {
2834 /* cleanup */
2835 free(clabel, M_RAIDFRAME);
2836 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2837 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2838 vput(vp);
2839 }
2840 return ac_list;
2841 }
2842
2843 RF_AutoConfig_t *
2844 rf_find_raid_components()
2845 {
2846 struct vnode *vp;
2847 struct disklabel label;
2848 struct device *dv;
2849 dev_t dev;
2850 int bmajor, bminor, wedge;
2851 int error;
2852 int i;
2853 RF_AutoConfig_t *ac_list;
2854
2855
2856 /* initialize the AutoConfig list */
2857 ac_list = NULL;
2858
2859 /* we begin by trolling through *all* the devices on the system */
2860
2861 for (dv = alldevs.tqh_first; dv != NULL;
2862 dv = dv->dv_list.tqe_next) {
2863
2864 /* we are only interested in disks... */
2865 if (device_class(dv) != DV_DISK)
2866 continue;
2867
2868 /* we don't care about floppies... */
2869 if (device_is_a(dv, "fd")) {
2870 continue;
2871 }
2872
2873 /* we don't care about CD's... */
2874 if (device_is_a(dv, "cd")) {
2875 continue;
2876 }
2877
2878 /* hdfd is the Atari/Hades floppy driver */
2879 if (device_is_a(dv, "hdfd")) {
2880 continue;
2881 }
2882
2883 /* fdisa is the Atari/Milan floppy driver */
2884 if (device_is_a(dv, "fdisa")) {
2885 continue;
2886 }
2887
2888 /* need to find the device_name_to_block_device_major stuff */
2889 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2890
2891 /* get a vnode for the raw partition of this disk */
2892
2893 wedge = device_is_a(dv, "dk");
2894 bminor = minor(device_unit(dv));
2895 dev = wedge ? makedev(bmajor, bminor) :
2896 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2897 if (bdevvp(dev, &vp))
2898 panic("RAID can't alloc vnode");
2899
2900 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2901
2902 if (error) {
2903 /* "Who cares." Continue looking
2904 for something that exists*/
2905 vput(vp);
2906 continue;
2907 }
2908
2909 if (wedge) {
2910 struct dkwedge_info dkw;
2911 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2912 NOCRED, 0);
2913 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2914 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2915 vput(vp);
2916 if (error) {
2917 printf("RAIDframe: can't get wedge info for "
2918 "dev %s (%d)\n", dv->dv_xname, error);
2919 continue;
2920 }
2921
2922 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2923 continue;
2924
2925 ac_list = rf_get_component(ac_list, dev, vp,
2926 dv->dv_xname, dkw.dkw_size);
2927 continue;
2928 }
2929
2930 /* Ok, the disk exists. Go get the disklabel. */
2931 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2932 if (error) {
2933 /*
2934 * XXX can't happen - open() would
2935 * have errored out (or faked up one)
2936 */
2937 if (error != ENOTTY)
2938 printf("RAIDframe: can't get label for dev "
2939 "%s (%d)\n", dv->dv_xname, error);
2940 }
2941
2942 /* don't need this any more. We'll allocate it again
2943 a little later if we really do... */
2944 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2945 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2946 vput(vp);
2947
2948 if (error)
2949 continue;
2950
2951 for (i = 0; i < label.d_npartitions; i++) {
2952 char cname[sizeof(ac_list->devname)];
2953
2954 /* We only support partitions marked as RAID */
2955 if (label.d_partitions[i].p_fstype != FS_RAID)
2956 continue;
2957
2958 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2959 if (bdevvp(dev, &vp))
2960 panic("RAID can't alloc vnode");
2961
2962 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2963 if (error) {
2964 /* Whatever... */
2965 vput(vp);
2966 continue;
2967 }
2968 snprintf(cname, sizeof(cname), "%s%c",
2969 dv->dv_xname, 'a' + i);
2970 ac_list = rf_get_component(ac_list, dev, vp, cname,
2971 label.d_partitions[i].p_size);
2972 }
2973 }
2974 return ac_list;
2975 }
2976
2977
2978 static int
2979 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2980 {
2981
2982 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2983 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2984 ((clabel->clean == RF_RAID_CLEAN) ||
2985 (clabel->clean == RF_RAID_DIRTY)) &&
2986 clabel->row >=0 &&
2987 clabel->column >= 0 &&
2988 clabel->num_rows > 0 &&
2989 clabel->num_columns > 0 &&
2990 clabel->row < clabel->num_rows &&
2991 clabel->column < clabel->num_columns &&
2992 clabel->blockSize > 0 &&
2993 clabel->numBlocks > 0) {
2994 /* label looks reasonable enough... */
2995 return(1);
2996 }
2997 return(0);
2998 }
2999
3000
3001 #ifdef DEBUG
3002 void
3003 rf_print_component_label(RF_ComponentLabel_t *clabel)
3004 {
3005 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3006 clabel->row, clabel->column,
3007 clabel->num_rows, clabel->num_columns);
3008 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3009 clabel->version, clabel->serial_number,
3010 clabel->mod_counter);
3011 printf(" Clean: %s Status: %d\n",
3012 clabel->clean ? "Yes" : "No", clabel->status );
3013 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3014 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3015 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
3016 (char) clabel->parityConfig, clabel->blockSize,
3017 clabel->numBlocks);
3018 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
3019 printf(" Contains root partition: %s\n",
3020 clabel->root_partition ? "Yes" : "No" );
3021 printf(" Last configured as: raid%d\n", clabel->last_unit );
3022 #if 0
3023 printf(" Config order: %d\n", clabel->config_order);
3024 #endif
3025
3026 }
3027 #endif
3028
3029 RF_ConfigSet_t *
3030 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3031 {
3032 RF_AutoConfig_t *ac;
3033 RF_ConfigSet_t *config_sets;
3034 RF_ConfigSet_t *cset;
3035 RF_AutoConfig_t *ac_next;
3036
3037
3038 config_sets = NULL;
3039
3040 /* Go through the AutoConfig list, and figure out which components
3041 belong to what sets. */
3042 ac = ac_list;
3043 while(ac!=NULL) {
3044 /* we're going to putz with ac->next, so save it here
3045 for use at the end of the loop */
3046 ac_next = ac->next;
3047
3048 if (config_sets == NULL) {
3049 /* will need at least this one... */
3050 config_sets = (RF_ConfigSet_t *)
3051 malloc(sizeof(RF_ConfigSet_t),
3052 M_RAIDFRAME, M_NOWAIT);
3053 if (config_sets == NULL) {
3054 panic("rf_create_auto_sets: No memory!");
3055 }
3056 /* this one is easy :) */
3057 config_sets->ac = ac;
3058 config_sets->next = NULL;
3059 config_sets->rootable = 0;
3060 ac->next = NULL;
3061 } else {
3062 /* which set does this component fit into? */
3063 cset = config_sets;
3064 while(cset!=NULL) {
3065 if (rf_does_it_fit(cset, ac)) {
3066 /* looks like it matches... */
3067 ac->next = cset->ac;
3068 cset->ac = ac;
3069 break;
3070 }
3071 cset = cset->next;
3072 }
3073 if (cset==NULL) {
3074 /* didn't find a match above... new set..*/
3075 cset = (RF_ConfigSet_t *)
3076 malloc(sizeof(RF_ConfigSet_t),
3077 M_RAIDFRAME, M_NOWAIT);
3078 if (cset == NULL) {
3079 panic("rf_create_auto_sets: No memory!");
3080 }
3081 cset->ac = ac;
3082 ac->next = NULL;
3083 cset->next = config_sets;
3084 cset->rootable = 0;
3085 config_sets = cset;
3086 }
3087 }
3088 ac = ac_next;
3089 }
3090
3091
3092 return(config_sets);
3093 }
3094
3095 static int
3096 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3097 {
3098 RF_ComponentLabel_t *clabel1, *clabel2;
3099
3100 /* If this one matches the *first* one in the set, that's good
3101 enough, since the other members of the set would have been
3102 through here too... */
3103 /* note that we are not checking partitionSize here..
3104
3105 Note that we are also not checking the mod_counters here.
3106 If everything else matches execpt the mod_counter, that's
3107 good enough for this test. We will deal with the mod_counters
3108 a little later in the autoconfiguration process.
3109
3110 (clabel1->mod_counter == clabel2->mod_counter) &&
3111
3112 The reason we don't check for this is that failed disks
3113 will have lower modification counts. If those disks are
3114 not added to the set they used to belong to, then they will
3115 form their own set, which may result in 2 different sets,
3116 for example, competing to be configured at raid0, and
3117 perhaps competing to be the root filesystem set. If the
3118 wrong ones get configured, or both attempt to become /,
3119 weird behaviour and or serious lossage will occur. Thus we
3120 need to bring them into the fold here, and kick them out at
3121 a later point.
3122
3123 */
3124
3125 clabel1 = cset->ac->clabel;
3126 clabel2 = ac->clabel;
3127 if ((clabel1->version == clabel2->version) &&
3128 (clabel1->serial_number == clabel2->serial_number) &&
3129 (clabel1->num_rows == clabel2->num_rows) &&
3130 (clabel1->num_columns == clabel2->num_columns) &&
3131 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3132 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3133 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3134 (clabel1->parityConfig == clabel2->parityConfig) &&
3135 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3136 (clabel1->blockSize == clabel2->blockSize) &&
3137 (clabel1->numBlocks == clabel2->numBlocks) &&
3138 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3139 (clabel1->root_partition == clabel2->root_partition) &&
3140 (clabel1->last_unit == clabel2->last_unit) &&
3141 (clabel1->config_order == clabel2->config_order)) {
3142 /* if it get's here, it almost *has* to be a match */
3143 } else {
3144 /* it's not consistent with somebody in the set..
3145 punt */
3146 return(0);
3147 }
3148 /* all was fine.. it must fit... */
3149 return(1);
3150 }
3151
3152 int
3153 rf_have_enough_components(RF_ConfigSet_t *cset)
3154 {
3155 RF_AutoConfig_t *ac;
3156 RF_AutoConfig_t *auto_config;
3157 RF_ComponentLabel_t *clabel;
3158 int c;
3159 int num_cols;
3160 int num_missing;
3161 int mod_counter;
3162 int mod_counter_found;
3163 int even_pair_failed;
3164 char parity_type;
3165
3166
3167 /* check to see that we have enough 'live' components
3168 of this set. If so, we can configure it if necessary */
3169
3170 num_cols = cset->ac->clabel->num_columns;
3171 parity_type = cset->ac->clabel->parityConfig;
3172
3173 /* XXX Check for duplicate components!?!?!? */
3174
3175 /* Determine what the mod_counter is supposed to be for this set. */
3176
3177 mod_counter_found = 0;
3178 mod_counter = 0;
3179 ac = cset->ac;
3180 while(ac!=NULL) {
3181 if (mod_counter_found==0) {
3182 mod_counter = ac->clabel->mod_counter;
3183 mod_counter_found = 1;
3184 } else {
3185 if (ac->clabel->mod_counter > mod_counter) {
3186 mod_counter = ac->clabel->mod_counter;
3187 }
3188 }
3189 ac = ac->next;
3190 }
3191
3192 num_missing = 0;
3193 auto_config = cset->ac;
3194
3195 even_pair_failed = 0;
3196 for(c=0; c<num_cols; c++) {
3197 ac = auto_config;
3198 while(ac!=NULL) {
3199 if ((ac->clabel->column == c) &&
3200 (ac->clabel->mod_counter == mod_counter)) {
3201 /* it's this one... */
3202 #ifdef DEBUG
3203 printf("Found: %s at %d\n",
3204 ac->devname,c);
3205 #endif
3206 break;
3207 }
3208 ac=ac->next;
3209 }
3210 if (ac==NULL) {
3211 /* Didn't find one here! */
3212 /* special case for RAID 1, especially
3213 where there are more than 2
3214 components (where RAIDframe treats
3215 things a little differently :( ) */
3216 if (parity_type == '1') {
3217 if (c%2 == 0) { /* even component */
3218 even_pair_failed = 1;
3219 } else { /* odd component. If
3220 we're failed, and
3221 so is the even
3222 component, it's
3223 "Good Night, Charlie" */
3224 if (even_pair_failed == 1) {
3225 return(0);
3226 }
3227 }
3228 } else {
3229 /* normal accounting */
3230 num_missing++;
3231 }
3232 }
3233 if ((parity_type == '1') && (c%2 == 1)) {
3234 /* Just did an even component, and we didn't
3235 bail.. reset the even_pair_failed flag,
3236 and go on to the next component.... */
3237 even_pair_failed = 0;
3238 }
3239 }
3240
3241 clabel = cset->ac->clabel;
3242
3243 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3244 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3245 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3246 /* XXX this needs to be made *much* more general */
3247 /* Too many failures */
3248 return(0);
3249 }
3250 /* otherwise, all is well, and we've got enough to take a kick
3251 at autoconfiguring this set */
3252 return(1);
3253 }
3254
3255 void
3256 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3257 RF_Raid_t *raidPtr)
3258 {
3259 RF_ComponentLabel_t *clabel;
3260 int i;
3261
3262 clabel = ac->clabel;
3263
3264 /* 1. Fill in the common stuff */
3265 config->numRow = clabel->num_rows = 1;
3266 config->numCol = clabel->num_columns;
3267 config->numSpare = 0; /* XXX should this be set here? */
3268 config->sectPerSU = clabel->sectPerSU;
3269 config->SUsPerPU = clabel->SUsPerPU;
3270 config->SUsPerRU = clabel->SUsPerRU;
3271 config->parityConfig = clabel->parityConfig;
3272 /* XXX... */
3273 strcpy(config->diskQueueType,"fifo");
3274 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3275 config->layoutSpecificSize = 0; /* XXX ?? */
3276
3277 while(ac!=NULL) {
3278 /* row/col values will be in range due to the checks
3279 in reasonable_label() */
3280 strcpy(config->devnames[0][ac->clabel->column],
3281 ac->devname);
3282 ac = ac->next;
3283 }
3284
3285 for(i=0;i<RF_MAXDBGV;i++) {
3286 config->debugVars[i][0] = 0;
3287 }
3288 }
3289
3290 int
3291 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3292 {
3293 RF_ComponentLabel_t clabel;
3294 struct vnode *vp;
3295 dev_t dev;
3296 int column;
3297 int sparecol;
3298
3299 raidPtr->autoconfigure = new_value;
3300
3301 for(column=0; column<raidPtr->numCol; column++) {
3302 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3303 dev = raidPtr->Disks[column].dev;
3304 vp = raidPtr->raid_cinfo[column].ci_vp;
3305 raidread_component_label(dev, vp, &clabel);
3306 clabel.autoconfigure = new_value;
3307 raidwrite_component_label(dev, vp, &clabel);
3308 }
3309 }
3310 for(column = 0; column < raidPtr->numSpare ; column++) {
3311 sparecol = raidPtr->numCol + column;
3312 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3313 dev = raidPtr->Disks[sparecol].dev;
3314 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3315 raidread_component_label(dev, vp, &clabel);
3316 clabel.autoconfigure = new_value;
3317 raidwrite_component_label(dev, vp, &clabel);
3318 }
3319 }
3320 return(new_value);
3321 }
3322
3323 int
3324 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3325 {
3326 RF_ComponentLabel_t clabel;
3327 struct vnode *vp;
3328 dev_t dev;
3329 int column;
3330 int sparecol;
3331
3332 raidPtr->root_partition = new_value;
3333 for(column=0; column<raidPtr->numCol; column++) {
3334 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3335 dev = raidPtr->Disks[column].dev;
3336 vp = raidPtr->raid_cinfo[column].ci_vp;
3337 raidread_component_label(dev, vp, &clabel);
3338 clabel.root_partition = new_value;
3339 raidwrite_component_label(dev, vp, &clabel);
3340 }
3341 }
3342 for(column = 0; column < raidPtr->numSpare ; column++) {
3343 sparecol = raidPtr->numCol + column;
3344 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3345 dev = raidPtr->Disks[sparecol].dev;
3346 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3347 raidread_component_label(dev, vp, &clabel);
3348 clabel.root_partition = new_value;
3349 raidwrite_component_label(dev, vp, &clabel);
3350 }
3351 }
3352 return(new_value);
3353 }
3354
3355 void
3356 rf_release_all_vps(RF_ConfigSet_t *cset)
3357 {
3358 RF_AutoConfig_t *ac;
3359
3360 ac = cset->ac;
3361 while(ac!=NULL) {
3362 /* Close the vp, and give it back */
3363 if (ac->vp) {
3364 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3365 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3366 vput(ac->vp);
3367 ac->vp = NULL;
3368 }
3369 ac = ac->next;
3370 }
3371 }
3372
3373
3374 void
3375 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3376 {
3377 RF_AutoConfig_t *ac;
3378 RF_AutoConfig_t *next_ac;
3379
3380 ac = cset->ac;
3381 while(ac!=NULL) {
3382 next_ac = ac->next;
3383 /* nuke the label */
3384 free(ac->clabel, M_RAIDFRAME);
3385 /* cleanup the config structure */
3386 free(ac, M_RAIDFRAME);
3387 /* "next.." */
3388 ac = next_ac;
3389 }
3390 /* and, finally, nuke the config set */
3391 free(cset, M_RAIDFRAME);
3392 }
3393
3394
3395 void
3396 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3397 {
3398 /* current version number */
3399 clabel->version = RF_COMPONENT_LABEL_VERSION;
3400 clabel->serial_number = raidPtr->serial_number;
3401 clabel->mod_counter = raidPtr->mod_counter;
3402 clabel->num_rows = 1;
3403 clabel->num_columns = raidPtr->numCol;
3404 clabel->clean = RF_RAID_DIRTY; /* not clean */
3405 clabel->status = rf_ds_optimal; /* "It's good!" */
3406
3407 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3408 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3409 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3410
3411 clabel->blockSize = raidPtr->bytesPerSector;
3412 clabel->numBlocks = raidPtr->sectorsPerDisk;
3413
3414 /* XXX not portable */
3415 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3416 clabel->maxOutstanding = raidPtr->maxOutstanding;
3417 clabel->autoconfigure = raidPtr->autoconfigure;
3418 clabel->root_partition = raidPtr->root_partition;
3419 clabel->last_unit = raidPtr->raidid;
3420 clabel->config_order = raidPtr->config_order;
3421 }
3422
3423 int
3424 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3425 {
3426 RF_Raid_t *raidPtr;
3427 RF_Config_t *config;
3428 int raidID;
3429 int retcode;
3430
3431 #ifdef DEBUG
3432 printf("RAID autoconfigure\n");
3433 #endif
3434
3435 retcode = 0;
3436 *unit = -1;
3437
3438 /* 1. Create a config structure */
3439
3440 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3441 M_RAIDFRAME,
3442 M_NOWAIT);
3443 if (config==NULL) {
3444 printf("Out of mem!?!?\n");
3445 /* XXX do something more intelligent here. */
3446 return(1);
3447 }
3448
3449 memset(config, 0, sizeof(RF_Config_t));
3450
3451 /*
3452 2. Figure out what RAID ID this one is supposed to live at
3453 See if we can get the same RAID dev that it was configured
3454 on last time..
3455 */
3456
3457 raidID = cset->ac->clabel->last_unit;
3458 if ((raidID < 0) || (raidID >= numraid)) {
3459 /* let's not wander off into lala land. */
3460 raidID = numraid - 1;
3461 }
3462 if (raidPtrs[raidID]->valid != 0) {
3463
3464 /*
3465 Nope... Go looking for an alternative...
3466 Start high so we don't immediately use raid0 if that's
3467 not taken.
3468 */
3469
3470 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3471 if (raidPtrs[raidID]->valid == 0) {
3472 /* can use this one! */
3473 break;
3474 }
3475 }
3476 }
3477
3478 if (raidID < 0) {
3479 /* punt... */
3480 printf("Unable to auto configure this set!\n");
3481 printf("(Out of RAID devs!)\n");
3482 free(config, M_RAIDFRAME);
3483 return(1);
3484 }
3485
3486 #ifdef DEBUG
3487 printf("Configuring raid%d:\n",raidID);
3488 #endif
3489
3490 raidPtr = raidPtrs[raidID];
3491
3492 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3493 raidPtr->raidid = raidID;
3494 raidPtr->openings = RAIDOUTSTANDING;
3495
3496 /* 3. Build the configuration structure */
3497 rf_create_configuration(cset->ac, config, raidPtr);
3498
3499 /* 4. Do the configuration */
3500 retcode = rf_Configure(raidPtr, config, cset->ac);
3501
3502 if (retcode == 0) {
3503
3504 raidinit(raidPtrs[raidID]);
3505
3506 rf_markalldirty(raidPtrs[raidID]);
3507 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3508 if (cset->ac->clabel->root_partition==1) {
3509 /* everything configured just fine. Make a note
3510 that this set is eligible to be root. */
3511 cset->rootable = 1;
3512 /* XXX do this here? */
3513 raidPtrs[raidID]->root_partition = 1;
3514 }
3515 }
3516
3517 /* 5. Cleanup */
3518 free(config, M_RAIDFRAME);
3519
3520 *unit = raidID;
3521 return(retcode);
3522 }
3523
3524 void
3525 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3526 {
3527 struct buf *bp;
3528
3529 bp = (struct buf *)desc->bp;
3530 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3531 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3532 }
3533
3534 void
3535 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3536 size_t xmin, size_t xmax)
3537 {
3538 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3539 pool_sethiwat(p, xmax);
3540 pool_prime(p, xmin);
3541 pool_setlowat(p, xmin);
3542 }
3543
3544 /*
3545 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3546 * if there is IO pending and if that IO could possibly be done for a
3547 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3548 * otherwise.
3549 *
3550 */
3551
3552 int
3553 rf_buf_queue_check(int raidid)
3554 {
3555 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3556 raidPtrs[raidid]->openings > 0) {
3557 /* there is work to do */
3558 return 0;
3559 }
3560 /* default is nothing to do */
3561 return 1;
3562 }
3563
3564 int
3565 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3566 {
3567 struct partinfo dpart;
3568 struct dkwedge_info dkw;
3569 int error;
3570
3571 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3572 if (error == 0) {
3573 diskPtr->blockSize = dpart.disklab->d_secsize;
3574 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3575 diskPtr->partitionSize = dpart.part->p_size;
3576 return 0;
3577 }
3578
3579 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3580 if (error == 0) {
3581 diskPtr->blockSize = 512; /* XXX */
3582 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3583 diskPtr->partitionSize = dkw.dkw_size;
3584 return 0;
3585 }
3586 return error;
3587 }
3588
3589 static int
3590 raid_match(struct device *self, struct cfdata *cfdata,
3591 void *aux)
3592 {
3593 return 1;
3594 }
3595
3596 static void
3597 raid_attach(struct device *parent, struct device *self,
3598 void *aux)
3599 {
3600
3601 }
3602
3603
3604 static int
3605 raid_detach(struct device *self, int flags)
3606 {
3607 struct raid_softc *rs = (struct raid_softc *)self;
3608
3609 if (rs->sc_flags & RAIDF_INITED)
3610 return EBUSY;
3611
3612 return 0;
3613 }
3614
3615 static void
3616 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3617 {
3618 prop_dictionary_t disk_info, odisk_info, geom;
3619 disk_info = prop_dictionary_create();
3620 geom = prop_dictionary_create();
3621 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3622 raidPtr->totalSectors);
3623 prop_dictionary_set_uint32(geom, "sector-size",
3624 raidPtr->bytesPerSector);
3625
3626 prop_dictionary_set_uint16(geom, "sectors-per-track",
3627 raidPtr->Layout.dataSectorsPerStripe);
3628 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3629 4 * raidPtr->numCol);
3630
3631 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3632 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3633 (4 * raidPtr->numCol)));
3634
3635 prop_dictionary_set(disk_info, "geometry", geom);
3636 prop_object_release(geom);
3637 prop_dictionary_set(device_properties(rs->sc_dev),
3638 "disk-info", disk_info);
3639 odisk_info = rs->sc_dkdev.dk_info;
3640 rs->sc_dkdev.dk_info = disk_info;
3641 if (odisk_info)
3642 prop_object_release(odisk_info);
3643 }
3644