rf_netbsdkintf.c revision 1.186.2.4 1 /* $NetBSD: rf_netbsdkintf.c,v 1.186.2.4 2007/12/19 18:31:14 ghen Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.186.2.4 2007/12/19 18:31:14 ghen Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /*
237 * Pilfered from ccd.c
238 */
239
240 struct raidbuf {
241 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
242 struct buf *rf_obp; /* ptr. to original I/O buf */
243 RF_DiskQueueData_t *req;/* the request that this was part of.. */
244 };
245
246 /* XXX Not sure if the following should be replacing the raidPtrs above,
247 or if it should be used in conjunction with that...
248 */
249
250 struct raid_softc {
251 int sc_flags; /* flags */
252 int sc_cflags; /* configuration flags */
253 size_t sc_size; /* size of the raid device */
254 char sc_xname[20]; /* XXX external name */
255 struct disk sc_dkdev; /* generic disk device info */
256 struct bufq_state buf_queue; /* used for the device queue */
257 };
258 /* sc_flags */
259 #define RAIDF_INITED 0x01 /* unit has been initialized */
260 #define RAIDF_WLABEL 0x02 /* label area is writable */
261 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
262 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
263 #define RAIDF_LOCKED 0x80 /* unit is locked */
264
265 #define raidunit(x) DISKUNIT(x)
266 int numraid = 0;
267
268 /*
269 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
270 * Be aware that large numbers can allow the driver to consume a lot of
271 * kernel memory, especially on writes, and in degraded mode reads.
272 *
273 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
274 * a single 64K write will typically require 64K for the old data,
275 * 64K for the old parity, and 64K for the new parity, for a total
276 * of 192K (if the parity buffer is not re-used immediately).
277 * Even it if is used immediately, that's still 128K, which when multiplied
278 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
279 *
280 * Now in degraded mode, for example, a 64K read on the above setup may
281 * require data reconstruction, which will require *all* of the 4 remaining
282 * disks to participate -- 4 * 32K/disk == 128K again.
283 */
284
285 #ifndef RAIDOUTSTANDING
286 #define RAIDOUTSTANDING 6
287 #endif
288
289 #define RAIDLABELDEV(dev) \
290 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
291
292 /* declared here, and made public, for the benefit of KVM stuff.. */
293 struct raid_softc *raid_softc;
294
295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
296 struct disklabel *);
297 static void raidgetdisklabel(dev_t);
298 static void raidmakedisklabel(struct raid_softc *);
299
300 static int raidlock(struct raid_softc *);
301 static void raidunlock(struct raid_softc *);
302
303 static void rf_markalldirty(RF_Raid_t *);
304
305 struct device *raidrootdev;
306
307 void rf_ReconThread(struct rf_recon_req *);
308 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
309 void rf_CopybackThread(RF_Raid_t *raidPtr);
310 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
311 int rf_autoconfig(struct device *self);
312 void rf_buildroothack(RF_ConfigSet_t *);
313
314 RF_AutoConfig_t *rf_find_raid_components(void);
315 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
316 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
317 static int rf_reasonable_label(RF_ComponentLabel_t *);
318 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
319 int rf_set_autoconfig(RF_Raid_t *, int);
320 int rf_set_rootpartition(RF_Raid_t *, int);
321 void rf_release_all_vps(RF_ConfigSet_t *);
322 void rf_cleanup_config_set(RF_ConfigSet_t *);
323 int rf_have_enough_components(RF_ConfigSet_t *);
324 int rf_auto_config_set(RF_ConfigSet_t *, int *);
325
326 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
327 allow autoconfig to take place.
328 Note that this is overridden by having
329 RAID_AUTOCONFIG as an option in the
330 kernel config file. */
331
332 struct RF_Pools_s rf_pools;
333
334 void
335 raidattach(int num)
336 {
337 int raidID;
338 int i, rc;
339
340 #ifdef DEBUG
341 printf("raidattach: Asked for %d units\n", num);
342 #endif
343
344 if (num <= 0) {
345 #ifdef DIAGNOSTIC
346 panic("raidattach: count <= 0");
347 #endif
348 return;
349 }
350 /* This is where all the initialization stuff gets done. */
351
352 numraid = num;
353
354 /* Make some space for requested number of units... */
355
356 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
357 if (raidPtrs == NULL) {
358 panic("raidPtrs is NULL!!");
359 }
360
361 /* Initialize the component buffer pool. */
362 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
363 "raidpl", num * RAIDOUTSTANDING,
364 2 * num * RAIDOUTSTANDING);
365
366 rf_mutex_init(&rf_sparet_wait_mutex);
367
368 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
369
370 for (i = 0; i < num; i++)
371 raidPtrs[i] = NULL;
372 rc = rf_BootRaidframe();
373 if (rc == 0)
374 printf("Kernelized RAIDframe activated\n");
375 else
376 panic("Serious error booting RAID!!");
377
378 /* put together some datastructures like the CCD device does.. This
379 * lets us lock the device and what-not when it gets opened. */
380
381 raid_softc = (struct raid_softc *)
382 malloc(num * sizeof(struct raid_softc),
383 M_RAIDFRAME, M_NOWAIT);
384 if (raid_softc == NULL) {
385 printf("WARNING: no memory for RAIDframe driver\n");
386 return;
387 }
388
389 memset(raid_softc, 0, num * sizeof(struct raid_softc));
390
391 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
392 M_RAIDFRAME, M_NOWAIT);
393 if (raidrootdev == NULL) {
394 panic("No memory for RAIDframe driver!!?!?!");
395 }
396
397 for (raidID = 0; raidID < num; raidID++) {
398 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
399
400 raidrootdev[raidID].dv_class = DV_DISK;
401 raidrootdev[raidID].dv_cfdata = NULL;
402 raidrootdev[raidID].dv_unit = raidID;
403 raidrootdev[raidID].dv_parent = NULL;
404 raidrootdev[raidID].dv_flags = 0;
405 snprintf(raidrootdev[raidID].dv_xname,
406 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
407
408 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
409 (RF_Raid_t *));
410 if (raidPtrs[raidID] == NULL) {
411 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
412 numraid = raidID;
413 return;
414 }
415 }
416
417 #ifdef RAID_AUTOCONFIG
418 raidautoconfig = 1;
419 #endif
420
421 /*
422 * Register a finalizer which will be used to auto-config RAID
423 * sets once all real hardware devices have been found.
424 */
425 if (config_finalize_register(NULL, rf_autoconfig) != 0)
426 printf("WARNING: unable to register RAIDframe finalizer\n");
427 }
428
429 int
430 rf_autoconfig(struct device *self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (raidautoconfig == 0)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfig = 0;
440
441 /* 1. locate all RAID components on the system */
442 #ifdef DEBUG
443 printf("Searching for RAID components...\n");
444 #endif
445 ac_list = rf_find_raid_components();
446
447 /* 2. Sort them into their respective sets. */
448 config_sets = rf_create_auto_sets(ac_list);
449
450 /*
451 * 3. Evaluate each set andconfigure the valid ones.
452 * This gets done in rf_buildroothack().
453 */
454 rf_buildroothack(config_sets);
455
456 return (1);
457 }
458
459 void
460 rf_buildroothack(RF_ConfigSet_t *config_sets)
461 {
462 RF_ConfigSet_t *cset;
463 RF_ConfigSet_t *next_cset;
464 int retcode;
465 int raidID;
466 int rootID;
467 int num_root;
468
469 rootID = 0;
470 num_root = 0;
471 cset = config_sets;
472 while(cset != NULL ) {
473 next_cset = cset->next;
474 if (rf_have_enough_components(cset) &&
475 cset->ac->clabel->autoconfigure==1) {
476 retcode = rf_auto_config_set(cset,&raidID);
477 if (!retcode) {
478 if (cset->rootable) {
479 rootID = raidID;
480 num_root++;
481 }
482 } else {
483 /* The autoconfig didn't work :( */
484 #if DEBUG
485 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
486 #endif
487 rf_release_all_vps(cset);
488 }
489 } else {
490 /* we're not autoconfiguring this set...
491 release the associated resources */
492 rf_release_all_vps(cset);
493 }
494 /* cleanup */
495 rf_cleanup_config_set(cset);
496 cset = next_cset;
497 }
498
499 /* we found something bootable... */
500
501 if (num_root == 1) {
502 booted_device = &raidrootdev[rootID];
503 } else if (num_root > 1) {
504 /* we can't guess.. require the user to answer... */
505 boothowto |= RB_ASKNAME;
506 }
507 }
508
509
510 int
511 raidsize(dev_t dev)
512 {
513 struct raid_softc *rs;
514 struct disklabel *lp;
515 int part, unit, omask, size;
516
517 unit = raidunit(dev);
518 if (unit >= numraid)
519 return (-1);
520 rs = &raid_softc[unit];
521
522 if ((rs->sc_flags & RAIDF_INITED) == 0)
523 return (-1);
524
525 part = DISKPART(dev);
526 omask = rs->sc_dkdev.dk_openmask & (1 << part);
527 lp = rs->sc_dkdev.dk_label;
528
529 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
530 return (-1);
531
532 if (lp->d_partitions[part].p_fstype != FS_SWAP)
533 size = -1;
534 else
535 size = lp->d_partitions[part].p_size *
536 (lp->d_secsize / DEV_BSIZE);
537
538 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
539 return (-1);
540
541 return (size);
542
543 }
544
545 int
546 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
547 {
548 int unit = raidunit(dev);
549 struct raid_softc *rs;
550 const struct bdevsw *bdev;
551 struct disklabel *lp;
552 RF_Raid_t *raidPtr;
553 daddr_t offset;
554 int part, c, sparecol, j, scol, dumpto;
555 int error = 0;
556
557 if (unit >= numraid)
558 return (ENXIO);
559
560 rs = &raid_softc[unit];
561 raidPtr = raidPtrs[unit];
562
563 if ((rs->sc_flags & RAIDF_INITED) == 0)
564 return ENXIO;
565
566 /* we only support dumping to RAID 1 sets */
567 if (raidPtr->Layout.numDataCol != 1 ||
568 raidPtr->Layout.numParityCol != 1)
569 return EINVAL;
570
571
572 if ((error = raidlock(rs)) != 0)
573 return error;
574
575 if (size % DEV_BSIZE != 0) {
576 error = EINVAL;
577 goto out;
578 }
579
580 if (blkno + size / DEV_BSIZE > rs->sc_size) {
581 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
582 "sc->sc_size (%zu)\n", __func__, blkno,
583 size / DEV_BSIZE, rs->sc_size);
584 error = EINVAL;
585 goto out;
586 }
587
588 part = DISKPART(dev);
589 lp = rs->sc_dkdev.dk_label;
590 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
591
592 /* figure out what device is alive.. */
593
594 /*
595 Look for a component to dump to. The preference for the
596 component to dump to is as follows:
597 1) the master
598 2) a used_spare of the master
599 3) the slave
600 4) a used_spare of the slave
601 */
602
603 dumpto = -1;
604 for (c = 0; c < raidPtr->numCol; c++) {
605 if (raidPtr->Disks[c].status == rf_ds_optimal) {
606 /* this might be the one */
607 dumpto = c;
608 break;
609 }
610 }
611
612 /*
613 At this point we have possibly selected a live master or a
614 live slave. We now check to see if there is a spared
615 master (or a spared slave), if we didn't find a live master
616 or a live slave.
617 */
618
619 for (c = 0; c < raidPtr->numSpare; c++) {
620 sparecol = raidPtr->numCol + c;
621 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
622 /* How about this one? */
623 scol = -1;
624 for(j=0;j<raidPtr->numCol;j++) {
625 if (raidPtr->Disks[j].spareCol == sparecol) {
626 scol = j;
627 break;
628 }
629 }
630 if (scol == 0) {
631 /*
632 We must have found a spared master!
633 We'll take that over anything else
634 found so far. (We couldn't have
635 found a real master before, since
636 this is a used spare, and it's
637 saying that it's replacing the
638 master.) On reboot (with
639 autoconfiguration turned on)
640 sparecol will become the 1st
641 component (component0) of this set.
642 */
643 dumpto = sparecol;
644 break;
645 } else if (scol != -1) {
646 /*
647 Must be a spared slave. We'll dump
648 to that if we havn't found anything
649 else so far.
650 */
651 if (dumpto == -1)
652 dumpto = sparecol;
653 }
654 }
655 }
656
657 if (dumpto == -1) {
658 /* we couldn't find any live components to dump to!?!?
659 */
660 error = EINVAL;
661 goto out;
662 }
663
664 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
665
666 /*
667 Note that blkno is relative to this particular partition.
668 By adding the offset of this partition in the RAID
669 set, and also adding RF_PROTECTED_SECTORS, we get a
670 value that is relative to the partition used for the
671 underlying component.
672 */
673
674 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
675 blkno + offset, va, size);
676
677 out:
678 raidunlock(rs);
679
680 return error;
681 }
682 /* ARGSUSED */
683 int
684 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
685 {
686 int unit = raidunit(dev);
687 struct raid_softc *rs;
688 struct disklabel *lp;
689 int part, pmask;
690 int error = 0;
691
692 if (unit >= numraid)
693 return (ENXIO);
694 rs = &raid_softc[unit];
695
696 if ((error = raidlock(rs)) != 0)
697 return (error);
698 lp = rs->sc_dkdev.dk_label;
699
700 part = DISKPART(dev);
701 pmask = (1 << part);
702
703 if ((rs->sc_flags & RAIDF_INITED) &&
704 (rs->sc_dkdev.dk_openmask == 0))
705 raidgetdisklabel(dev);
706
707 /* make sure that this partition exists */
708
709 if (part != RAW_PART) {
710 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
711 ((part >= lp->d_npartitions) ||
712 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
713 error = ENXIO;
714 raidunlock(rs);
715 return (error);
716 }
717 }
718 /* Prevent this unit from being unconfigured while open. */
719 switch (fmt) {
720 case S_IFCHR:
721 rs->sc_dkdev.dk_copenmask |= pmask;
722 break;
723
724 case S_IFBLK:
725 rs->sc_dkdev.dk_bopenmask |= pmask;
726 break;
727 }
728
729 if ((rs->sc_dkdev.dk_openmask == 0) &&
730 ((rs->sc_flags & RAIDF_INITED) != 0)) {
731 /* First one... mark things as dirty... Note that we *MUST*
732 have done a configure before this. I DO NOT WANT TO BE
733 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
734 THAT THEY BELONG TOGETHER!!!!! */
735 /* XXX should check to see if we're only open for reading
736 here... If so, we needn't do this, but then need some
737 other way of keeping track of what's happened.. */
738
739 rf_markalldirty( raidPtrs[unit] );
740 }
741
742
743 rs->sc_dkdev.dk_openmask =
744 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
745
746 raidunlock(rs);
747
748 return (error);
749
750
751 }
752 /* ARGSUSED */
753 int
754 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
755 {
756 int unit = raidunit(dev);
757 struct raid_softc *rs;
758 int error = 0;
759 int part;
760
761 if (unit >= numraid)
762 return (ENXIO);
763 rs = &raid_softc[unit];
764
765 if ((error = raidlock(rs)) != 0)
766 return (error);
767
768 part = DISKPART(dev);
769
770 /* ...that much closer to allowing unconfiguration... */
771 switch (fmt) {
772 case S_IFCHR:
773 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
774 break;
775
776 case S_IFBLK:
777 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
778 break;
779 }
780 rs->sc_dkdev.dk_openmask =
781 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
782
783 if ((rs->sc_dkdev.dk_openmask == 0) &&
784 ((rs->sc_flags & RAIDF_INITED) != 0)) {
785 /* Last one... device is not unconfigured yet.
786 Device shutdown has taken care of setting the
787 clean bits if RAIDF_INITED is not set
788 mark things as clean... */
789
790 rf_update_component_labels(raidPtrs[unit],
791 RF_FINAL_COMPONENT_UPDATE);
792 if (doing_shutdown) {
793 /* last one, and we're going down, so
794 lights out for this RAID set too. */
795 error = rf_Shutdown(raidPtrs[unit]);
796
797 /* It's no longer initialized... */
798 rs->sc_flags &= ~RAIDF_INITED;
799
800 /* Detach the disk. */
801 disk_detach(&rs->sc_dkdev);
802 }
803 }
804
805 raidunlock(rs);
806 return (0);
807
808 }
809
810 void
811 raidstrategy(struct buf *bp)
812 {
813 int s;
814
815 unsigned int raidID = raidunit(bp->b_dev);
816 RF_Raid_t *raidPtr;
817 struct raid_softc *rs = &raid_softc[raidID];
818 int wlabel;
819
820 if ((rs->sc_flags & RAIDF_INITED) ==0) {
821 bp->b_error = ENXIO;
822 bp->b_flags |= B_ERROR;
823 bp->b_resid = bp->b_bcount;
824 biodone(bp);
825 return;
826 }
827 if (raidID >= numraid || !raidPtrs[raidID]) {
828 bp->b_error = ENODEV;
829 bp->b_flags |= B_ERROR;
830 bp->b_resid = bp->b_bcount;
831 biodone(bp);
832 return;
833 }
834 raidPtr = raidPtrs[raidID];
835 if (!raidPtr->valid) {
836 bp->b_error = ENODEV;
837 bp->b_flags |= B_ERROR;
838 bp->b_resid = bp->b_bcount;
839 biodone(bp);
840 return;
841 }
842 if (bp->b_bcount == 0) {
843 db1_printf(("b_bcount is zero..\n"));
844 biodone(bp);
845 return;
846 }
847
848 /*
849 * Do bounds checking and adjust transfer. If there's an
850 * error, the bounds check will flag that for us.
851 */
852
853 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
854 if (DISKPART(bp->b_dev) != RAW_PART)
855 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
856 db1_printf(("Bounds check failed!!:%d %d\n",
857 (int) bp->b_blkno, (int) wlabel));
858 biodone(bp);
859 return;
860 }
861 s = splbio();
862
863 bp->b_resid = 0;
864
865 /* stuff it onto our queue */
866 BUFQ_PUT(&rs->buf_queue, bp);
867
868 /* scheduled the IO to happen at the next convenient time */
869 wakeup(&(raidPtrs[raidID]->iodone));
870
871 splx(s);
872 }
873 /* ARGSUSED */
874 int
875 raidread(dev_t dev, struct uio *uio, int flags)
876 {
877 int unit = raidunit(dev);
878 struct raid_softc *rs;
879
880 if (unit >= numraid)
881 return (ENXIO);
882 rs = &raid_softc[unit];
883
884 if ((rs->sc_flags & RAIDF_INITED) == 0)
885 return (ENXIO);
886
887 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
888
889 }
890 /* ARGSUSED */
891 int
892 raidwrite(dev_t dev, struct uio *uio, int flags)
893 {
894 int unit = raidunit(dev);
895 struct raid_softc *rs;
896
897 if (unit >= numraid)
898 return (ENXIO);
899 rs = &raid_softc[unit];
900
901 if ((rs->sc_flags & RAIDF_INITED) == 0)
902 return (ENXIO);
903
904 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
905
906 }
907
908 int
909 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
910 {
911 int unit = raidunit(dev);
912 int error = 0;
913 int part, pmask;
914 struct raid_softc *rs;
915 RF_Config_t *k_cfg, *u_cfg;
916 RF_Raid_t *raidPtr;
917 RF_RaidDisk_t *diskPtr;
918 RF_AccTotals_t *totals;
919 RF_DeviceConfig_t *d_cfg, **ucfgp;
920 u_char *specific_buf;
921 int retcode = 0;
922 int column;
923 int raidid;
924 struct rf_recon_req *rrcopy, *rr;
925 RF_ComponentLabel_t *clabel;
926 RF_ComponentLabel_t ci_label;
927 RF_ComponentLabel_t **clabel_ptr;
928 RF_SingleComponent_t *sparePtr,*componentPtr;
929 RF_SingleComponent_t hot_spare;
930 RF_SingleComponent_t component;
931 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
932 int i, j, d;
933 #ifdef __HAVE_OLD_DISKLABEL
934 struct disklabel newlabel;
935 #endif
936
937 if (unit >= numraid)
938 return (ENXIO);
939 rs = &raid_softc[unit];
940 raidPtr = raidPtrs[unit];
941
942 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
943 (int) DISKPART(dev), (int) unit, (int) cmd));
944
945 /* Must be open for writes for these commands... */
946 switch (cmd) {
947 case DIOCSDINFO:
948 case DIOCWDINFO:
949 #ifdef __HAVE_OLD_DISKLABEL
950 case ODIOCWDINFO:
951 case ODIOCSDINFO:
952 #endif
953 case DIOCWLABEL:
954 if ((flag & FWRITE) == 0)
955 return (EBADF);
956 }
957
958 /* Must be initialized for these... */
959 switch (cmd) {
960 case DIOCGDINFO:
961 case DIOCSDINFO:
962 case DIOCWDINFO:
963 #ifdef __HAVE_OLD_DISKLABEL
964 case ODIOCGDINFO:
965 case ODIOCWDINFO:
966 case ODIOCSDINFO:
967 case ODIOCGDEFLABEL:
968 #endif
969 case DIOCGPART:
970 case DIOCWLABEL:
971 case DIOCGDEFLABEL:
972 case RAIDFRAME_SHUTDOWN:
973 case RAIDFRAME_REWRITEPARITY:
974 case RAIDFRAME_GET_INFO:
975 case RAIDFRAME_RESET_ACCTOTALS:
976 case RAIDFRAME_GET_ACCTOTALS:
977 case RAIDFRAME_KEEP_ACCTOTALS:
978 case RAIDFRAME_GET_SIZE:
979 case RAIDFRAME_FAIL_DISK:
980 case RAIDFRAME_COPYBACK:
981 case RAIDFRAME_CHECK_RECON_STATUS:
982 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
983 case RAIDFRAME_GET_COMPONENT_LABEL:
984 case RAIDFRAME_SET_COMPONENT_LABEL:
985 case RAIDFRAME_ADD_HOT_SPARE:
986 case RAIDFRAME_REMOVE_HOT_SPARE:
987 case RAIDFRAME_INIT_LABELS:
988 case RAIDFRAME_REBUILD_IN_PLACE:
989 case RAIDFRAME_CHECK_PARITY:
990 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
991 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
992 case RAIDFRAME_CHECK_COPYBACK_STATUS:
993 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
994 case RAIDFRAME_SET_AUTOCONFIG:
995 case RAIDFRAME_SET_ROOT:
996 case RAIDFRAME_DELETE_COMPONENT:
997 case RAIDFRAME_INCORPORATE_HOT_SPARE:
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000 }
1001
1002 switch (cmd) {
1003
1004 /* configure the system */
1005 case RAIDFRAME_CONFIGURE:
1006
1007 if (raidPtr->valid) {
1008 /* There is a valid RAID set running on this unit! */
1009 printf("raid%d: Device already configured!\n",unit);
1010 return(EINVAL);
1011 }
1012
1013 /* copy-in the configuration information */
1014 /* data points to a pointer to the configuration structure */
1015
1016 u_cfg = *((RF_Config_t **) data);
1017 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1018 if (k_cfg == NULL) {
1019 return (ENOMEM);
1020 }
1021 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1022 if (retcode) {
1023 RF_Free(k_cfg, sizeof(RF_Config_t));
1024 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1025 retcode));
1026 return (retcode);
1027 }
1028 /* allocate a buffer for the layout-specific data, and copy it
1029 * in */
1030 if (k_cfg->layoutSpecificSize) {
1031 if (k_cfg->layoutSpecificSize > 10000) {
1032 /* sanity check */
1033 RF_Free(k_cfg, sizeof(RF_Config_t));
1034 return (EINVAL);
1035 }
1036 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1037 (u_char *));
1038 if (specific_buf == NULL) {
1039 RF_Free(k_cfg, sizeof(RF_Config_t));
1040 return (ENOMEM);
1041 }
1042 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1043 k_cfg->layoutSpecificSize);
1044 if (retcode) {
1045 RF_Free(k_cfg, sizeof(RF_Config_t));
1046 RF_Free(specific_buf,
1047 k_cfg->layoutSpecificSize);
1048 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1049 retcode));
1050 return (retcode);
1051 }
1052 } else
1053 specific_buf = NULL;
1054 k_cfg->layoutSpecific = specific_buf;
1055
1056 /* should do some kind of sanity check on the configuration.
1057 * Store the sum of all the bytes in the last byte? */
1058
1059 /* configure the system */
1060
1061 /*
1062 * Clear the entire RAID descriptor, just to make sure
1063 * there is no stale data left in the case of a
1064 * reconfiguration
1065 */
1066 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1067 raidPtr->raidid = unit;
1068
1069 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1070
1071 if (retcode == 0) {
1072
1073 /* allow this many simultaneous IO's to
1074 this RAID device */
1075 raidPtr->openings = RAIDOUTSTANDING;
1076
1077 raidinit(raidPtr);
1078 rf_markalldirty(raidPtr);
1079 }
1080 /* free the buffers. No return code here. */
1081 if (k_cfg->layoutSpecificSize) {
1082 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1083 }
1084 RF_Free(k_cfg, sizeof(RF_Config_t));
1085
1086 return (retcode);
1087
1088 /* shutdown the system */
1089 case RAIDFRAME_SHUTDOWN:
1090
1091 if ((error = raidlock(rs)) != 0)
1092 return (error);
1093
1094 /*
1095 * If somebody has a partition mounted, we shouldn't
1096 * shutdown.
1097 */
1098
1099 part = DISKPART(dev);
1100 pmask = (1 << part);
1101 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1102 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1103 (rs->sc_dkdev.dk_copenmask & pmask))) {
1104 raidunlock(rs);
1105 return (EBUSY);
1106 }
1107
1108 retcode = rf_Shutdown(raidPtr);
1109
1110 /* It's no longer initialized... */
1111 rs->sc_flags &= ~RAIDF_INITED;
1112
1113 /* Detach the disk. */
1114 disk_detach(&rs->sc_dkdev);
1115
1116 raidunlock(rs);
1117
1118 return (retcode);
1119 case RAIDFRAME_GET_COMPONENT_LABEL:
1120 clabel_ptr = (RF_ComponentLabel_t **) data;
1121 /* need to read the component label for the disk indicated
1122 by row,column in clabel */
1123
1124 /* For practice, let's get it directly fromdisk, rather
1125 than from the in-core copy */
1126 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1127 (RF_ComponentLabel_t *));
1128 if (clabel == NULL)
1129 return (ENOMEM);
1130
1131 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1132
1133 retcode = copyin( *clabel_ptr, clabel,
1134 sizeof(RF_ComponentLabel_t));
1135
1136 if (retcode) {
1137 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1138 return(retcode);
1139 }
1140
1141 clabel->row = 0; /* Don't allow looking at anything else.*/
1142
1143 column = clabel->column;
1144
1145 if ((column < 0) || (column >= raidPtr->numCol +
1146 raidPtr->numSpare)) {
1147 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1148 return(EINVAL);
1149 }
1150
1151 raidread_component_label(raidPtr->Disks[column].dev,
1152 raidPtr->raid_cinfo[column].ci_vp,
1153 clabel );
1154
1155 retcode = copyout(clabel, *clabel_ptr,
1156 sizeof(RF_ComponentLabel_t));
1157 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1158 return (retcode);
1159
1160 case RAIDFRAME_SET_COMPONENT_LABEL:
1161 clabel = (RF_ComponentLabel_t *) data;
1162
1163 /* XXX check the label for valid stuff... */
1164 /* Note that some things *should not* get modified --
1165 the user should be re-initing the labels instead of
1166 trying to patch things.
1167 */
1168
1169 raidid = raidPtr->raidid;
1170 #if DEBUG
1171 printf("raid%d: Got component label:\n", raidid);
1172 printf("raid%d: Version: %d\n", raidid, clabel->version);
1173 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1174 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1175 printf("raid%d: Column: %d\n", raidid, clabel->column);
1176 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1177 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1178 printf("raid%d: Status: %d\n", raidid, clabel->status);
1179 #endif
1180 clabel->row = 0;
1181 column = clabel->column;
1182
1183 if ((column < 0) || (column >= raidPtr->numCol)) {
1184 return(EINVAL);
1185 }
1186
1187 /* XXX this isn't allowed to do anything for now :-) */
1188
1189 /* XXX and before it is, we need to fill in the rest
1190 of the fields!?!?!?! */
1191 #if 0
1192 raidwrite_component_label(
1193 raidPtr->Disks[column].dev,
1194 raidPtr->raid_cinfo[column].ci_vp,
1195 clabel );
1196 #endif
1197 return (0);
1198
1199 case RAIDFRAME_INIT_LABELS:
1200 clabel = (RF_ComponentLabel_t *) data;
1201 /*
1202 we only want the serial number from
1203 the above. We get all the rest of the information
1204 from the config that was used to create this RAID
1205 set.
1206 */
1207
1208 raidPtr->serial_number = clabel->serial_number;
1209
1210 raid_init_component_label(raidPtr, &ci_label);
1211 ci_label.serial_number = clabel->serial_number;
1212 ci_label.row = 0; /* we dont' pretend to support more */
1213
1214 for(column=0;column<raidPtr->numCol;column++) {
1215 diskPtr = &raidPtr->Disks[column];
1216 if (!RF_DEAD_DISK(diskPtr->status)) {
1217 ci_label.partitionSize = diskPtr->partitionSize;
1218 ci_label.column = column;
1219 raidwrite_component_label(
1220 raidPtr->Disks[column].dev,
1221 raidPtr->raid_cinfo[column].ci_vp,
1222 &ci_label );
1223 }
1224 }
1225
1226 return (retcode);
1227 case RAIDFRAME_SET_AUTOCONFIG:
1228 d = rf_set_autoconfig(raidPtr, *(int *) data);
1229 printf("raid%d: New autoconfig value is: %d\n",
1230 raidPtr->raidid, d);
1231 *(int *) data = d;
1232 return (retcode);
1233
1234 case RAIDFRAME_SET_ROOT:
1235 d = rf_set_rootpartition(raidPtr, *(int *) data);
1236 printf("raid%d: New rootpartition value is: %d\n",
1237 raidPtr->raidid, d);
1238 *(int *) data = d;
1239 return (retcode);
1240
1241 /* initialize all parity */
1242 case RAIDFRAME_REWRITEPARITY:
1243
1244 if (raidPtr->Layout.map->faultsTolerated == 0) {
1245 /* Parity for RAID 0 is trivially correct */
1246 raidPtr->parity_good = RF_RAID_CLEAN;
1247 return(0);
1248 }
1249
1250 if (raidPtr->parity_rewrite_in_progress == 1) {
1251 /* Re-write is already in progress! */
1252 return(EINVAL);
1253 }
1254
1255 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1256 rf_RewriteParityThread,
1257 raidPtr,"raid_parity");
1258 return (retcode);
1259
1260
1261 case RAIDFRAME_ADD_HOT_SPARE:
1262 sparePtr = (RF_SingleComponent_t *) data;
1263 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1264 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1265 return(retcode);
1266
1267 case RAIDFRAME_REMOVE_HOT_SPARE:
1268 return(retcode);
1269
1270 case RAIDFRAME_DELETE_COMPONENT:
1271 componentPtr = (RF_SingleComponent_t *)data;
1272 memcpy( &component, componentPtr,
1273 sizeof(RF_SingleComponent_t));
1274 retcode = rf_delete_component(raidPtr, &component);
1275 return(retcode);
1276
1277 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1278 componentPtr = (RF_SingleComponent_t *)data;
1279 memcpy( &component, componentPtr,
1280 sizeof(RF_SingleComponent_t));
1281 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1282 return(retcode);
1283
1284 case RAIDFRAME_REBUILD_IN_PLACE:
1285
1286 if (raidPtr->Layout.map->faultsTolerated == 0) {
1287 /* Can't do this on a RAID 0!! */
1288 return(EINVAL);
1289 }
1290
1291 if (raidPtr->recon_in_progress == 1) {
1292 /* a reconstruct is already in progress! */
1293 return(EINVAL);
1294 }
1295
1296 componentPtr = (RF_SingleComponent_t *) data;
1297 memcpy( &component, componentPtr,
1298 sizeof(RF_SingleComponent_t));
1299 component.row = 0; /* we don't support any more */
1300 column = component.column;
1301
1302 if ((column < 0) || (column >= raidPtr->numCol)) {
1303 return(EINVAL);
1304 }
1305
1306 RF_LOCK_MUTEX(raidPtr->mutex);
1307 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1308 (raidPtr->numFailures > 0)) {
1309 /* XXX 0 above shouldn't be constant!!! */
1310 /* some component other than this has failed.
1311 Let's not make things worse than they already
1312 are... */
1313 printf("raid%d: Unable to reconstruct to disk at:\n",
1314 raidPtr->raidid);
1315 printf("raid%d: Col: %d Too many failures.\n",
1316 raidPtr->raidid, column);
1317 RF_UNLOCK_MUTEX(raidPtr->mutex);
1318 return (EINVAL);
1319 }
1320 if (raidPtr->Disks[column].status ==
1321 rf_ds_reconstructing) {
1322 printf("raid%d: Unable to reconstruct to disk at:\n",
1323 raidPtr->raidid);
1324 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1325
1326 RF_UNLOCK_MUTEX(raidPtr->mutex);
1327 return (EINVAL);
1328 }
1329 if (raidPtr->Disks[column].status == rf_ds_spared) {
1330 RF_UNLOCK_MUTEX(raidPtr->mutex);
1331 return (EINVAL);
1332 }
1333 RF_UNLOCK_MUTEX(raidPtr->mutex);
1334
1335 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1336 if (rrcopy == NULL)
1337 return(ENOMEM);
1338
1339 rrcopy->raidPtr = (void *) raidPtr;
1340 rrcopy->col = column;
1341
1342 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1343 rf_ReconstructInPlaceThread,
1344 rrcopy,"raid_reconip");
1345 return(retcode);
1346
1347 case RAIDFRAME_GET_INFO:
1348 if (!raidPtr->valid)
1349 return (ENODEV);
1350 ucfgp = (RF_DeviceConfig_t **) data;
1351 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1352 (RF_DeviceConfig_t *));
1353 if (d_cfg == NULL)
1354 return (ENOMEM);
1355 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1356 d_cfg->rows = 1; /* there is only 1 row now */
1357 d_cfg->cols = raidPtr->numCol;
1358 d_cfg->ndevs = raidPtr->numCol;
1359 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1360 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1361 return (ENOMEM);
1362 }
1363 d_cfg->nspares = raidPtr->numSpare;
1364 if (d_cfg->nspares >= RF_MAX_DISKS) {
1365 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1366 return (ENOMEM);
1367 }
1368 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1369 d = 0;
1370 for (j = 0; j < d_cfg->cols; j++) {
1371 d_cfg->devs[d] = raidPtr->Disks[j];
1372 d++;
1373 }
1374 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1375 d_cfg->spares[i] = raidPtr->Disks[j];
1376 }
1377 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1378 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1379
1380 return (retcode);
1381
1382 case RAIDFRAME_CHECK_PARITY:
1383 *(int *) data = raidPtr->parity_good;
1384 return (0);
1385
1386 case RAIDFRAME_RESET_ACCTOTALS:
1387 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1388 return (0);
1389
1390 case RAIDFRAME_GET_ACCTOTALS:
1391 totals = (RF_AccTotals_t *) data;
1392 *totals = raidPtr->acc_totals;
1393 return (0);
1394
1395 case RAIDFRAME_KEEP_ACCTOTALS:
1396 raidPtr->keep_acc_totals = *(int *)data;
1397 return (0);
1398
1399 case RAIDFRAME_GET_SIZE:
1400 *(int *) data = raidPtr->totalSectors;
1401 return (0);
1402
1403 /* fail a disk & optionally start reconstruction */
1404 case RAIDFRAME_FAIL_DISK:
1405
1406 if (raidPtr->Layout.map->faultsTolerated == 0) {
1407 /* Can't do this on a RAID 0!! */
1408 return(EINVAL);
1409 }
1410
1411 rr = (struct rf_recon_req *) data;
1412 rr->row = 0;
1413 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1414 return (EINVAL);
1415
1416
1417 RF_LOCK_MUTEX(raidPtr->mutex);
1418 if (raidPtr->status == rf_rs_reconstructing) {
1419 /* you can't fail a disk while we're reconstructing! */
1420 /* XXX wrong for RAID6 */
1421 RF_UNLOCK_MUTEX(raidPtr->mutex);
1422 return (EINVAL);
1423 }
1424 if ((raidPtr->Disks[rr->col].status ==
1425 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1426 /* some other component has failed. Let's not make
1427 things worse. XXX wrong for RAID6 */
1428 RF_UNLOCK_MUTEX(raidPtr->mutex);
1429 return (EINVAL);
1430 }
1431 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1432 /* Can't fail a spared disk! */
1433 RF_UNLOCK_MUTEX(raidPtr->mutex);
1434 return (EINVAL);
1435 }
1436 RF_UNLOCK_MUTEX(raidPtr->mutex);
1437
1438 /* make a copy of the recon request so that we don't rely on
1439 * the user's buffer */
1440 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1441 if (rrcopy == NULL)
1442 return(ENOMEM);
1443 memcpy(rrcopy, rr, sizeof(*rr));
1444 rrcopy->raidPtr = (void *) raidPtr;
1445
1446 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1447 rf_ReconThread,
1448 rrcopy,"raid_recon");
1449 return (0);
1450
1451 /* invoke a copyback operation after recon on whatever disk
1452 * needs it, if any */
1453 case RAIDFRAME_COPYBACK:
1454
1455 if (raidPtr->Layout.map->faultsTolerated == 0) {
1456 /* This makes no sense on a RAID 0!! */
1457 return(EINVAL);
1458 }
1459
1460 if (raidPtr->copyback_in_progress == 1) {
1461 /* Copyback is already in progress! */
1462 return(EINVAL);
1463 }
1464
1465 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1466 rf_CopybackThread,
1467 raidPtr,"raid_copyback");
1468 return (retcode);
1469
1470 /* return the percentage completion of reconstruction */
1471 case RAIDFRAME_CHECK_RECON_STATUS:
1472 if (raidPtr->Layout.map->faultsTolerated == 0) {
1473 /* This makes no sense on a RAID 0, so tell the
1474 user it's done. */
1475 *(int *) data = 100;
1476 return(0);
1477 }
1478 if (raidPtr->status != rf_rs_reconstructing)
1479 *(int *) data = 100;
1480 else {
1481 if (raidPtr->reconControl->numRUsTotal > 0) {
1482 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1483 } else {
1484 *(int *) data = 0;
1485 }
1486 }
1487 return (0);
1488 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1489 progressInfoPtr = (RF_ProgressInfo_t **) data;
1490 if (raidPtr->status != rf_rs_reconstructing) {
1491 progressInfo.remaining = 0;
1492 progressInfo.completed = 100;
1493 progressInfo.total = 100;
1494 } else {
1495 progressInfo.total =
1496 raidPtr->reconControl->numRUsTotal;
1497 progressInfo.completed =
1498 raidPtr->reconControl->numRUsComplete;
1499 progressInfo.remaining = progressInfo.total -
1500 progressInfo.completed;
1501 }
1502 retcode = copyout(&progressInfo, *progressInfoPtr,
1503 sizeof(RF_ProgressInfo_t));
1504 return (retcode);
1505
1506 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1507 if (raidPtr->Layout.map->faultsTolerated == 0) {
1508 /* This makes no sense on a RAID 0, so tell the
1509 user it's done. */
1510 *(int *) data = 100;
1511 return(0);
1512 }
1513 if (raidPtr->parity_rewrite_in_progress == 1) {
1514 *(int *) data = 100 *
1515 raidPtr->parity_rewrite_stripes_done /
1516 raidPtr->Layout.numStripe;
1517 } else {
1518 *(int *) data = 100;
1519 }
1520 return (0);
1521
1522 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1523 progressInfoPtr = (RF_ProgressInfo_t **) data;
1524 if (raidPtr->parity_rewrite_in_progress == 1) {
1525 progressInfo.total = raidPtr->Layout.numStripe;
1526 progressInfo.completed =
1527 raidPtr->parity_rewrite_stripes_done;
1528 progressInfo.remaining = progressInfo.total -
1529 progressInfo.completed;
1530 } else {
1531 progressInfo.remaining = 0;
1532 progressInfo.completed = 100;
1533 progressInfo.total = 100;
1534 }
1535 retcode = copyout(&progressInfo, *progressInfoPtr,
1536 sizeof(RF_ProgressInfo_t));
1537 return (retcode);
1538
1539 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1540 if (raidPtr->Layout.map->faultsTolerated == 0) {
1541 /* This makes no sense on a RAID 0 */
1542 *(int *) data = 100;
1543 return(0);
1544 }
1545 if (raidPtr->copyback_in_progress == 1) {
1546 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1547 raidPtr->Layout.numStripe;
1548 } else {
1549 *(int *) data = 100;
1550 }
1551 return (0);
1552
1553 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1554 progressInfoPtr = (RF_ProgressInfo_t **) data;
1555 if (raidPtr->copyback_in_progress == 1) {
1556 progressInfo.total = raidPtr->Layout.numStripe;
1557 progressInfo.completed =
1558 raidPtr->copyback_stripes_done;
1559 progressInfo.remaining = progressInfo.total -
1560 progressInfo.completed;
1561 } else {
1562 progressInfo.remaining = 0;
1563 progressInfo.completed = 100;
1564 progressInfo.total = 100;
1565 }
1566 retcode = copyout(&progressInfo, *progressInfoPtr,
1567 sizeof(RF_ProgressInfo_t));
1568 return (retcode);
1569
1570 /* the sparetable daemon calls this to wait for the kernel to
1571 * need a spare table. this ioctl does not return until a
1572 * spare table is needed. XXX -- calling mpsleep here in the
1573 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1574 * -- I should either compute the spare table in the kernel,
1575 * or have a different -- XXX XXX -- interface (a different
1576 * character device) for delivering the table -- XXX */
1577 #if 0
1578 case RAIDFRAME_SPARET_WAIT:
1579 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1580 while (!rf_sparet_wait_queue)
1581 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1582 waitreq = rf_sparet_wait_queue;
1583 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1584 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1585
1586 /* structure assignment */
1587 *((RF_SparetWait_t *) data) = *waitreq;
1588
1589 RF_Free(waitreq, sizeof(*waitreq));
1590 return (0);
1591
1592 /* wakes up a process waiting on SPARET_WAIT and puts an error
1593 * code in it that will cause the dameon to exit */
1594 case RAIDFRAME_ABORT_SPARET_WAIT:
1595 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1596 waitreq->fcol = -1;
1597 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1598 waitreq->next = rf_sparet_wait_queue;
1599 rf_sparet_wait_queue = waitreq;
1600 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1601 wakeup(&rf_sparet_wait_queue);
1602 return (0);
1603
1604 /* used by the spare table daemon to deliver a spare table
1605 * into the kernel */
1606 case RAIDFRAME_SEND_SPARET:
1607
1608 /* install the spare table */
1609 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1610
1611 /* respond to the requestor. the return status of the spare
1612 * table installation is passed in the "fcol" field */
1613 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1614 waitreq->fcol = retcode;
1615 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1616 waitreq->next = rf_sparet_resp_queue;
1617 rf_sparet_resp_queue = waitreq;
1618 wakeup(&rf_sparet_resp_queue);
1619 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1620
1621 return (retcode);
1622 #endif
1623
1624 default:
1625 break; /* fall through to the os-specific code below */
1626
1627 }
1628
1629 if (!raidPtr->valid)
1630 return (EINVAL);
1631
1632 /*
1633 * Add support for "regular" device ioctls here.
1634 */
1635
1636 switch (cmd) {
1637 case DIOCGDINFO:
1638 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1639 break;
1640 #ifdef __HAVE_OLD_DISKLABEL
1641 case ODIOCGDINFO:
1642 newlabel = *(rs->sc_dkdev.dk_label);
1643 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1644 return ENOTTY;
1645 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1646 break;
1647 #endif
1648
1649 case DIOCGPART:
1650 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1651 ((struct partinfo *) data)->part =
1652 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1653 break;
1654
1655 case DIOCWDINFO:
1656 case DIOCSDINFO:
1657 #ifdef __HAVE_OLD_DISKLABEL
1658 case ODIOCWDINFO:
1659 case ODIOCSDINFO:
1660 #endif
1661 {
1662 struct disklabel *lp;
1663 #ifdef __HAVE_OLD_DISKLABEL
1664 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1665 memset(&newlabel, 0, sizeof newlabel);
1666 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1667 lp = &newlabel;
1668 } else
1669 #endif
1670 lp = (struct disklabel *)data;
1671
1672 if ((error = raidlock(rs)) != 0)
1673 return (error);
1674
1675 rs->sc_flags |= RAIDF_LABELLING;
1676
1677 error = setdisklabel(rs->sc_dkdev.dk_label,
1678 lp, 0, rs->sc_dkdev.dk_cpulabel);
1679 if (error == 0) {
1680 if (cmd == DIOCWDINFO
1681 #ifdef __HAVE_OLD_DISKLABEL
1682 || cmd == ODIOCWDINFO
1683 #endif
1684 )
1685 error = writedisklabel(RAIDLABELDEV(dev),
1686 raidstrategy, rs->sc_dkdev.dk_label,
1687 rs->sc_dkdev.dk_cpulabel);
1688 }
1689 rs->sc_flags &= ~RAIDF_LABELLING;
1690
1691 raidunlock(rs);
1692
1693 if (error)
1694 return (error);
1695 break;
1696 }
1697
1698 case DIOCWLABEL:
1699 if (*(int *) data != 0)
1700 rs->sc_flags |= RAIDF_WLABEL;
1701 else
1702 rs->sc_flags &= ~RAIDF_WLABEL;
1703 break;
1704
1705 case DIOCGDEFLABEL:
1706 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1707 break;
1708
1709 #ifdef __HAVE_OLD_DISKLABEL
1710 case ODIOCGDEFLABEL:
1711 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1712 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1713 return ENOTTY;
1714 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1715 break;
1716 #endif
1717
1718 default:
1719 retcode = ENOTTY;
1720 }
1721 return (retcode);
1722
1723 }
1724
1725
1726 /* raidinit -- complete the rest of the initialization for the
1727 RAIDframe device. */
1728
1729
1730 static void
1731 raidinit(RF_Raid_t *raidPtr)
1732 {
1733 struct raid_softc *rs;
1734 int unit;
1735
1736 unit = raidPtr->raidid;
1737
1738 rs = &raid_softc[unit];
1739
1740 /* XXX should check return code first... */
1741 rs->sc_flags |= RAIDF_INITED;
1742
1743 /* XXX doesn't check bounds. */
1744 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1745
1746 rs->sc_dkdev.dk_name = rs->sc_xname;
1747
1748 /* disk_attach actually creates space for the CPU disklabel, among
1749 * other things, so it's critical to call this *BEFORE* we try putzing
1750 * with disklabels. */
1751
1752 disk_attach(&rs->sc_dkdev);
1753
1754 /* XXX There may be a weird interaction here between this, and
1755 * protectedSectors, as used in RAIDframe. */
1756
1757 rs->sc_size = raidPtr->totalSectors;
1758 }
1759 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1760 /* wake up the daemon & tell it to get us a spare table
1761 * XXX
1762 * the entries in the queues should be tagged with the raidPtr
1763 * so that in the extremely rare case that two recons happen at once,
1764 * we know for which device were requesting a spare table
1765 * XXX
1766 *
1767 * XXX This code is not currently used. GO
1768 */
1769 int
1770 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1771 {
1772 int retcode;
1773
1774 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1775 req->next = rf_sparet_wait_queue;
1776 rf_sparet_wait_queue = req;
1777 wakeup(&rf_sparet_wait_queue);
1778
1779 /* mpsleep unlocks the mutex */
1780 while (!rf_sparet_resp_queue) {
1781 tsleep(&rf_sparet_resp_queue, PRIBIO,
1782 "raidframe getsparetable", 0);
1783 }
1784 req = rf_sparet_resp_queue;
1785 rf_sparet_resp_queue = req->next;
1786 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1787
1788 retcode = req->fcol;
1789 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1790 * alloc'd */
1791 return (retcode);
1792 }
1793 #endif
1794
1795 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1796 * bp & passes it down.
1797 * any calls originating in the kernel must use non-blocking I/O
1798 * do some extra sanity checking to return "appropriate" error values for
1799 * certain conditions (to make some standard utilities work)
1800 *
1801 * Formerly known as: rf_DoAccessKernel
1802 */
1803 void
1804 raidstart(RF_Raid_t *raidPtr)
1805 {
1806 RF_SectorCount_t num_blocks, pb, sum;
1807 RF_RaidAddr_t raid_addr;
1808 struct partition *pp;
1809 daddr_t blocknum;
1810 int unit;
1811 struct raid_softc *rs;
1812 int do_async;
1813 struct buf *bp;
1814 int rc;
1815
1816 unit = raidPtr->raidid;
1817 rs = &raid_softc[unit];
1818
1819 /* quick check to see if anything has died recently */
1820 RF_LOCK_MUTEX(raidPtr->mutex);
1821 if (raidPtr->numNewFailures > 0) {
1822 RF_UNLOCK_MUTEX(raidPtr->mutex);
1823 rf_update_component_labels(raidPtr,
1824 RF_NORMAL_COMPONENT_UPDATE);
1825 RF_LOCK_MUTEX(raidPtr->mutex);
1826 raidPtr->numNewFailures--;
1827 }
1828
1829 /* Check to see if we're at the limit... */
1830 while (raidPtr->openings > 0) {
1831 RF_UNLOCK_MUTEX(raidPtr->mutex);
1832
1833 /* get the next item, if any, from the queue */
1834 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1835 /* nothing more to do */
1836 return;
1837 }
1838
1839 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1840 * partition.. Need to make it absolute to the underlying
1841 * device.. */
1842
1843 blocknum = bp->b_blkno;
1844 if (DISKPART(bp->b_dev) != RAW_PART) {
1845 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1846 blocknum += pp->p_offset;
1847 }
1848
1849 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1850 (int) blocknum));
1851
1852 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1853 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1854
1855 /* *THIS* is where we adjust what block we're going to...
1856 * but DO NOT TOUCH bp->b_blkno!!! */
1857 raid_addr = blocknum;
1858
1859 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1860 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1861 sum = raid_addr + num_blocks + pb;
1862 if (1 || rf_debugKernelAccess) {
1863 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1864 (int) raid_addr, (int) sum, (int) num_blocks,
1865 (int) pb, (int) bp->b_resid));
1866 }
1867 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1868 || (sum < num_blocks) || (sum < pb)) {
1869 bp->b_error = ENOSPC;
1870 bp->b_flags |= B_ERROR;
1871 bp->b_resid = bp->b_bcount;
1872 biodone(bp);
1873 RF_LOCK_MUTEX(raidPtr->mutex);
1874 continue;
1875 }
1876 /*
1877 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1878 */
1879
1880 if (bp->b_bcount & raidPtr->sectorMask) {
1881 bp->b_error = EINVAL;
1882 bp->b_flags |= B_ERROR;
1883 bp->b_resid = bp->b_bcount;
1884 biodone(bp);
1885 RF_LOCK_MUTEX(raidPtr->mutex);
1886 continue;
1887
1888 }
1889 db1_printf(("Calling DoAccess..\n"));
1890
1891
1892 RF_LOCK_MUTEX(raidPtr->mutex);
1893 raidPtr->openings--;
1894 RF_UNLOCK_MUTEX(raidPtr->mutex);
1895
1896 /*
1897 * Everything is async.
1898 */
1899 do_async = 1;
1900
1901 disk_busy(&rs->sc_dkdev);
1902
1903 /* XXX we're still at splbio() here... do we *really*
1904 need to be? */
1905
1906 /* don't ever condition on bp->b_flags & B_WRITE.
1907 * always condition on B_READ instead */
1908
1909 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1910 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1911 do_async, raid_addr, num_blocks,
1912 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1913
1914 if (rc) {
1915 bp->b_error = rc;
1916 bp->b_flags |= B_ERROR;
1917 bp->b_resid = bp->b_bcount;
1918 biodone(bp);
1919 /* continue loop */
1920 }
1921
1922 RF_LOCK_MUTEX(raidPtr->mutex);
1923 }
1924 RF_UNLOCK_MUTEX(raidPtr->mutex);
1925 }
1926
1927
1928
1929
1930 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1931
1932 int
1933 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1934 {
1935 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1936 struct buf *bp;
1937 struct raidbuf *raidbp = NULL;
1938
1939 req->queue = queue;
1940
1941 #if DIAGNOSTIC
1942 if (queue->raidPtr->raidid >= numraid) {
1943 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1944 numraid);
1945 panic("Invalid Unit number in rf_DispatchKernelIO");
1946 }
1947 #endif
1948
1949 bp = req->bp;
1950 #if 1
1951 /* XXX when there is a physical disk failure, someone is passing us a
1952 * buffer that contains old stuff!! Attempt to deal with this problem
1953 * without taking a performance hit... (not sure where the real bug
1954 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1955
1956 if (bp->b_flags & B_ERROR) {
1957 bp->b_flags &= ~B_ERROR;
1958 }
1959 if (bp->b_error != 0) {
1960 bp->b_error = 0;
1961 }
1962 #endif
1963 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1964 if (raidbp == NULL) {
1965 bp->b_flags |= B_ERROR;
1966 bp->b_error = ENOMEM;
1967 return (ENOMEM);
1968 }
1969 BUF_INIT(&raidbp->rf_buf);
1970
1971 /*
1972 * context for raidiodone
1973 */
1974 raidbp->rf_obp = bp;
1975 raidbp->req = req;
1976
1977 BIO_COPYPRIO(&raidbp->rf_buf, bp);
1978
1979 switch (req->type) {
1980 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1981 /* XXX need to do something extra here.. */
1982 /* I'm leaving this in, as I've never actually seen it used,
1983 * and I'd like folks to report it... GO */
1984 printf(("WAKEUP CALLED\n"));
1985 queue->numOutstanding++;
1986
1987 /* XXX need to glue the original buffer into this?? */
1988
1989 KernelWakeupFunc(&raidbp->rf_buf);
1990 break;
1991
1992 case RF_IO_TYPE_READ:
1993 case RF_IO_TYPE_WRITE:
1994 #if RF_ACC_TRACE > 0
1995 if (req->tracerec) {
1996 RF_ETIMER_START(req->tracerec->timer);
1997 }
1998 #endif
1999 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
2000 op | bp->b_flags, queue->rf_cinfo->ci_dev,
2001 req->sectorOffset, req->numSector,
2002 req->buf, KernelWakeupFunc, (void *) req,
2003 queue->raidPtr->logBytesPerSector, req->b_proc);
2004
2005 if (rf_debugKernelAccess) {
2006 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2007 (long) bp->b_blkno));
2008 }
2009 queue->numOutstanding++;
2010 queue->last_deq_sector = req->sectorOffset;
2011 /* acc wouldn't have been let in if there were any pending
2012 * reqs at any other priority */
2013 queue->curPriority = req->priority;
2014
2015 db1_printf(("Going for %c to unit %d col %d\n",
2016 req->type, queue->raidPtr->raidid,
2017 queue->col));
2018 db1_printf(("sector %d count %d (%d bytes) %d\n",
2019 (int) req->sectorOffset, (int) req->numSector,
2020 (int) (req->numSector <<
2021 queue->raidPtr->logBytesPerSector),
2022 (int) queue->raidPtr->logBytesPerSector));
2023 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
2024 raidbp->rf_buf.b_vp->v_numoutput++;
2025 }
2026 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
2027
2028 break;
2029
2030 default:
2031 panic("bad req->type in rf_DispatchKernelIO");
2032 }
2033 db1_printf(("Exiting from DispatchKernelIO\n"));
2034
2035 return (0);
2036 }
2037 /* this is the callback function associated with a I/O invoked from
2038 kernel code.
2039 */
2040 static void
2041 KernelWakeupFunc(struct buf *vbp)
2042 {
2043 RF_DiskQueueData_t *req = NULL;
2044 RF_DiskQueue_t *queue;
2045 struct raidbuf *raidbp = (struct raidbuf *) vbp;
2046 struct buf *bp;
2047 int s;
2048
2049 s = splbio();
2050 db1_printf(("recovering the request queue:\n"));
2051 req = raidbp->req;
2052
2053 bp = raidbp->rf_obp;
2054
2055 queue = (RF_DiskQueue_t *) req->queue;
2056
2057 if (raidbp->rf_buf.b_flags & B_ERROR) {
2058 bp->b_flags |= B_ERROR;
2059 bp->b_error = raidbp->rf_buf.b_error ?
2060 raidbp->rf_buf.b_error : EIO;
2061 }
2062
2063 /* XXX methinks this could be wrong... */
2064 #if 1
2065 bp->b_resid = raidbp->rf_buf.b_resid;
2066 #endif
2067 #if RF_ACC_TRACE > 0
2068 if (req->tracerec) {
2069 RF_ETIMER_STOP(req->tracerec->timer);
2070 RF_ETIMER_EVAL(req->tracerec->timer);
2071 RF_LOCK_MUTEX(rf_tracing_mutex);
2072 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2073 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2074 req->tracerec->num_phys_ios++;
2075 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2076 }
2077 #endif
2078 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
2079
2080 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
2081 * ballistic, and mark the component as hosed... */
2082
2083 if (bp->b_flags & B_ERROR) {
2084 /* Mark the disk as dead */
2085 /* but only mark it once... */
2086 /* and only if it wouldn't leave this RAID set
2087 completely broken */
2088 if (((queue->raidPtr->Disks[queue->col].status ==
2089 rf_ds_optimal) ||
2090 (queue->raidPtr->Disks[queue->col].status ==
2091 rf_ds_used_spare)) &&
2092 (queue->raidPtr->numFailures <
2093 queue->raidPtr->Layout.map->faultsTolerated)) {
2094 printf("raid%d: IO Error. Marking %s as failed.\n",
2095 queue->raidPtr->raidid,
2096 queue->raidPtr->Disks[queue->col].devname);
2097 queue->raidPtr->Disks[queue->col].status =
2098 rf_ds_failed;
2099 queue->raidPtr->status = rf_rs_degraded;
2100 queue->raidPtr->numFailures++;
2101 queue->raidPtr->numNewFailures++;
2102 } else { /* Disk is already dead... */
2103 /* printf("Disk already marked as dead!\n"); */
2104 }
2105
2106 }
2107
2108 pool_put(&rf_pools.cbuf, raidbp);
2109
2110 /* Fill in the error value */
2111
2112 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2113
2114 simple_lock(&queue->raidPtr->iodone_lock);
2115
2116 /* Drop this one on the "finished" queue... */
2117 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2118
2119 /* Let the raidio thread know there is work to be done. */
2120 wakeup(&(queue->raidPtr->iodone));
2121
2122 simple_unlock(&queue->raidPtr->iodone_lock);
2123
2124 splx(s);
2125 }
2126
2127
2128
2129 /*
2130 * initialize a buf structure for doing an I/O in the kernel.
2131 */
2132 static void
2133 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2134 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
2135 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2136 struct proc *b_proc)
2137 {
2138 /* bp->b_flags = B_PHYS | rw_flag; */
2139 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2140 bp->b_bcount = numSect << logBytesPerSector;
2141 bp->b_bufsize = bp->b_bcount;
2142 bp->b_error = 0;
2143 bp->b_dev = dev;
2144 bp->b_data = bf;
2145 bp->b_blkno = startSect;
2146 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2147 if (bp->b_bcount == 0) {
2148 panic("bp->b_bcount is zero in InitBP!!");
2149 }
2150 bp->b_proc = b_proc;
2151 bp->b_iodone = cbFunc;
2152 bp->b_vp = b_vp;
2153
2154 }
2155
2156 static void
2157 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2158 struct disklabel *lp)
2159 {
2160 memset(lp, 0, sizeof(*lp));
2161
2162 /* fabricate a label... */
2163 lp->d_secperunit = raidPtr->totalSectors;
2164 lp->d_secsize = raidPtr->bytesPerSector;
2165 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2166 lp->d_ntracks = 4 * raidPtr->numCol;
2167 lp->d_ncylinders = raidPtr->totalSectors /
2168 (lp->d_nsectors * lp->d_ntracks);
2169 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2170
2171 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2172 lp->d_type = DTYPE_RAID;
2173 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2174 lp->d_rpm = 3600;
2175 lp->d_interleave = 1;
2176 lp->d_flags = 0;
2177
2178 lp->d_partitions[RAW_PART].p_offset = 0;
2179 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2180 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2181 lp->d_npartitions = RAW_PART + 1;
2182
2183 lp->d_magic = DISKMAGIC;
2184 lp->d_magic2 = DISKMAGIC;
2185 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2186
2187 }
2188 /*
2189 * Read the disklabel from the raid device. If one is not present, fake one
2190 * up.
2191 */
2192 static void
2193 raidgetdisklabel(dev_t dev)
2194 {
2195 int unit = raidunit(dev);
2196 struct raid_softc *rs = &raid_softc[unit];
2197 const char *errstring;
2198 struct disklabel *lp = rs->sc_dkdev.dk_label;
2199 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2200 RF_Raid_t *raidPtr;
2201
2202 db1_printf(("Getting the disklabel...\n"));
2203
2204 memset(clp, 0, sizeof(*clp));
2205
2206 raidPtr = raidPtrs[unit];
2207
2208 raidgetdefaultlabel(raidPtr, rs, lp);
2209
2210 /*
2211 * Call the generic disklabel extraction routine.
2212 */
2213 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2214 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2215 if (errstring)
2216 raidmakedisklabel(rs);
2217 else {
2218 int i;
2219 struct partition *pp;
2220
2221 /*
2222 * Sanity check whether the found disklabel is valid.
2223 *
2224 * This is necessary since total size of the raid device
2225 * may vary when an interleave is changed even though exactly
2226 * same componets are used, and old disklabel may used
2227 * if that is found.
2228 */
2229 if (lp->d_secperunit != rs->sc_size)
2230 printf("raid%d: WARNING: %s: "
2231 "total sector size in disklabel (%d) != "
2232 "the size of raid (%ld)\n", unit, rs->sc_xname,
2233 lp->d_secperunit, (long) rs->sc_size);
2234 for (i = 0; i < lp->d_npartitions; i++) {
2235 pp = &lp->d_partitions[i];
2236 if (pp->p_offset + pp->p_size > rs->sc_size)
2237 printf("raid%d: WARNING: %s: end of partition `%c' "
2238 "exceeds the size of raid (%ld)\n",
2239 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2240 }
2241 }
2242
2243 }
2244 /*
2245 * Take care of things one might want to take care of in the event
2246 * that a disklabel isn't present.
2247 */
2248 static void
2249 raidmakedisklabel(struct raid_softc *rs)
2250 {
2251 struct disklabel *lp = rs->sc_dkdev.dk_label;
2252 db1_printf(("Making a label..\n"));
2253
2254 /*
2255 * For historical reasons, if there's no disklabel present
2256 * the raw partition must be marked FS_BSDFFS.
2257 */
2258
2259 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2260
2261 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2262
2263 lp->d_checksum = dkcksum(lp);
2264 }
2265 /*
2266 * Lookup the provided name in the filesystem. If the file exists,
2267 * is a valid block device, and isn't being used by anyone else,
2268 * set *vpp to the file's vnode.
2269 * You'll find the original of this in ccd.c
2270 */
2271 int
2272 raidlookup(char *path, struct proc *p, struct vnode **vpp)
2273 {
2274 struct nameidata nd;
2275 struct vnode *vp;
2276 struct vattr va;
2277 int error;
2278
2279 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2280 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2281 return (error);
2282 }
2283 vp = nd.ni_vp;
2284 if (vp->v_usecount > 1) {
2285 VOP_UNLOCK(vp, 0);
2286 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2287 return (EBUSY);
2288 }
2289 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2290 VOP_UNLOCK(vp, 0);
2291 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2292 return (error);
2293 }
2294 /* XXX: eventually we should handle VREG, too. */
2295 if (va.va_type != VBLK) {
2296 VOP_UNLOCK(vp, 0);
2297 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2298 return (ENOTBLK);
2299 }
2300 VOP_UNLOCK(vp, 0);
2301 *vpp = vp;
2302 return (0);
2303 }
2304 /*
2305 * Wait interruptibly for an exclusive lock.
2306 *
2307 * XXX
2308 * Several drivers do this; it should be abstracted and made MP-safe.
2309 * (Hmm... where have we seen this warning before :-> GO )
2310 */
2311 static int
2312 raidlock(struct raid_softc *rs)
2313 {
2314 int error;
2315
2316 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2317 rs->sc_flags |= RAIDF_WANTED;
2318 if ((error =
2319 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2320 return (error);
2321 }
2322 rs->sc_flags |= RAIDF_LOCKED;
2323 return (0);
2324 }
2325 /*
2326 * Unlock and wake up any waiters.
2327 */
2328 static void
2329 raidunlock(struct raid_softc *rs)
2330 {
2331
2332 rs->sc_flags &= ~RAIDF_LOCKED;
2333 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2334 rs->sc_flags &= ~RAIDF_WANTED;
2335 wakeup(rs);
2336 }
2337 }
2338
2339
2340 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2341 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2342
2343 int
2344 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2345 {
2346 RF_ComponentLabel_t clabel;
2347 raidread_component_label(dev, b_vp, &clabel);
2348 clabel.mod_counter = mod_counter;
2349 clabel.clean = RF_RAID_CLEAN;
2350 raidwrite_component_label(dev, b_vp, &clabel);
2351 return(0);
2352 }
2353
2354
2355 int
2356 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2357 {
2358 RF_ComponentLabel_t clabel;
2359 raidread_component_label(dev, b_vp, &clabel);
2360 clabel.mod_counter = mod_counter;
2361 clabel.clean = RF_RAID_DIRTY;
2362 raidwrite_component_label(dev, b_vp, &clabel);
2363 return(0);
2364 }
2365
2366 /* ARGSUSED */
2367 int
2368 raidread_component_label(dev_t dev, struct vnode *b_vp,
2369 RF_ComponentLabel_t *clabel)
2370 {
2371 struct buf *bp;
2372 const struct bdevsw *bdev;
2373 int error;
2374
2375 /* XXX should probably ensure that we don't try to do this if
2376 someone has changed rf_protected_sectors. */
2377
2378 if (b_vp == NULL) {
2379 /* For whatever reason, this component is not valid.
2380 Don't try to read a component label from it. */
2381 return(EINVAL);
2382 }
2383
2384 /* get a block of the appropriate size... */
2385 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2386 bp->b_dev = dev;
2387
2388 /* get our ducks in a row for the read */
2389 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2390 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2391 bp->b_flags |= B_READ;
2392 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2393
2394 bdev = bdevsw_lookup(bp->b_dev);
2395 if (bdev == NULL)
2396 return (ENXIO);
2397 (*bdev->d_strategy)(bp);
2398
2399 error = biowait(bp);
2400
2401 if (!error) {
2402 memcpy(clabel, bp->b_data,
2403 sizeof(RF_ComponentLabel_t));
2404 }
2405
2406 brelse(bp);
2407 return(error);
2408 }
2409 /* ARGSUSED */
2410 int
2411 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2412 RF_ComponentLabel_t *clabel)
2413 {
2414 struct buf *bp;
2415 const struct bdevsw *bdev;
2416 int error;
2417
2418 /* get a block of the appropriate size... */
2419 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2420 bp->b_dev = dev;
2421
2422 /* get our ducks in a row for the write */
2423 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2424 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2425 bp->b_flags |= B_WRITE;
2426 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2427
2428 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2429
2430 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2431
2432 bdev = bdevsw_lookup(bp->b_dev);
2433 if (bdev == NULL)
2434 return (ENXIO);
2435 (*bdev->d_strategy)(bp);
2436 error = biowait(bp);
2437 brelse(bp);
2438 if (error) {
2439 #if 1
2440 printf("Failed to write RAID component info!\n");
2441 #endif
2442 }
2443
2444 return(error);
2445 }
2446
2447 void
2448 rf_markalldirty(RF_Raid_t *raidPtr)
2449 {
2450 RF_ComponentLabel_t clabel;
2451 int sparecol;
2452 int c;
2453 int j;
2454 int scol = -1;
2455
2456 raidPtr->mod_counter++;
2457 for (c = 0; c < raidPtr->numCol; c++) {
2458 /* we don't want to touch (at all) a disk that has
2459 failed */
2460 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2461 raidread_component_label(
2462 raidPtr->Disks[c].dev,
2463 raidPtr->raid_cinfo[c].ci_vp,
2464 &clabel);
2465 if (clabel.status == rf_ds_spared) {
2466 /* XXX do something special...
2467 but whatever you do, don't
2468 try to access it!! */
2469 } else {
2470 raidmarkdirty(
2471 raidPtr->Disks[c].dev,
2472 raidPtr->raid_cinfo[c].ci_vp,
2473 raidPtr->mod_counter);
2474 }
2475 }
2476 }
2477
2478 for( c = 0; c < raidPtr->numSpare ; c++) {
2479 sparecol = raidPtr->numCol + c;
2480 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2481 /*
2482
2483 we claim this disk is "optimal" if it's
2484 rf_ds_used_spare, as that means it should be
2485 directly substitutable for the disk it replaced.
2486 We note that too...
2487
2488 */
2489
2490 for(j=0;j<raidPtr->numCol;j++) {
2491 if (raidPtr->Disks[j].spareCol == sparecol) {
2492 scol = j;
2493 break;
2494 }
2495 }
2496
2497 raidread_component_label(
2498 raidPtr->Disks[sparecol].dev,
2499 raidPtr->raid_cinfo[sparecol].ci_vp,
2500 &clabel);
2501 /* make sure status is noted */
2502
2503 raid_init_component_label(raidPtr, &clabel);
2504
2505 clabel.row = 0;
2506 clabel.column = scol;
2507 /* Note: we *don't* change status from rf_ds_used_spare
2508 to rf_ds_optimal */
2509 /* clabel.status = rf_ds_optimal; */
2510
2511 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2512 raidPtr->raid_cinfo[sparecol].ci_vp,
2513 raidPtr->mod_counter);
2514 }
2515 }
2516 }
2517
2518
2519 void
2520 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2521 {
2522 RF_ComponentLabel_t clabel;
2523 int sparecol;
2524 int c;
2525 int j;
2526 int scol;
2527
2528 scol = -1;
2529
2530 /* XXX should do extra checks to make sure things really are clean,
2531 rather than blindly setting the clean bit... */
2532
2533 raidPtr->mod_counter++;
2534
2535 for (c = 0; c < raidPtr->numCol; c++) {
2536 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2537 raidread_component_label(
2538 raidPtr->Disks[c].dev,
2539 raidPtr->raid_cinfo[c].ci_vp,
2540 &clabel);
2541 /* make sure status is noted */
2542 clabel.status = rf_ds_optimal;
2543 /* bump the counter */
2544 clabel.mod_counter = raidPtr->mod_counter;
2545
2546 raidwrite_component_label(
2547 raidPtr->Disks[c].dev,
2548 raidPtr->raid_cinfo[c].ci_vp,
2549 &clabel);
2550 if (final == RF_FINAL_COMPONENT_UPDATE) {
2551 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2552 raidmarkclean(
2553 raidPtr->Disks[c].dev,
2554 raidPtr->raid_cinfo[c].ci_vp,
2555 raidPtr->mod_counter);
2556 }
2557 }
2558 }
2559 /* else we don't touch it.. */
2560 }
2561
2562 for( c = 0; c < raidPtr->numSpare ; c++) {
2563 sparecol = raidPtr->numCol + c;
2564 /* Need to ensure that the reconstruct actually completed! */
2565 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2566 /*
2567
2568 we claim this disk is "optimal" if it's
2569 rf_ds_used_spare, as that means it should be
2570 directly substitutable for the disk it replaced.
2571 We note that too...
2572
2573 */
2574
2575 for(j=0;j<raidPtr->numCol;j++) {
2576 if (raidPtr->Disks[j].spareCol == sparecol) {
2577 scol = j;
2578 break;
2579 }
2580 }
2581
2582 /* XXX shouldn't *really* need this... */
2583 raidread_component_label(
2584 raidPtr->Disks[sparecol].dev,
2585 raidPtr->raid_cinfo[sparecol].ci_vp,
2586 &clabel);
2587 /* make sure status is noted */
2588
2589 raid_init_component_label(raidPtr, &clabel);
2590
2591 clabel.mod_counter = raidPtr->mod_counter;
2592 clabel.column = scol;
2593 clabel.status = rf_ds_optimal;
2594
2595 raidwrite_component_label(
2596 raidPtr->Disks[sparecol].dev,
2597 raidPtr->raid_cinfo[sparecol].ci_vp,
2598 &clabel);
2599 if (final == RF_FINAL_COMPONENT_UPDATE) {
2600 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2601 raidmarkclean( raidPtr->Disks[sparecol].dev,
2602 raidPtr->raid_cinfo[sparecol].ci_vp,
2603 raidPtr->mod_counter);
2604 }
2605 }
2606 }
2607 }
2608 }
2609
2610 void
2611 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2612 {
2613 struct proc *p;
2614
2615 p = raidPtr->engine_thread;
2616
2617 if (vp != NULL) {
2618 if (auto_configured == 1) {
2619 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2620 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2621 vput(vp);
2622
2623 } else {
2624 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2625 }
2626 }
2627 }
2628
2629
2630 void
2631 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2632 {
2633 int r,c;
2634 struct vnode *vp;
2635 int acd;
2636
2637
2638 /* We take this opportunity to close the vnodes like we should.. */
2639
2640 for (c = 0; c < raidPtr->numCol; c++) {
2641 vp = raidPtr->raid_cinfo[c].ci_vp;
2642 acd = raidPtr->Disks[c].auto_configured;
2643 rf_close_component(raidPtr, vp, acd);
2644 raidPtr->raid_cinfo[c].ci_vp = NULL;
2645 raidPtr->Disks[c].auto_configured = 0;
2646 }
2647
2648 for (r = 0; r < raidPtr->numSpare; r++) {
2649 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2650 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2651 rf_close_component(raidPtr, vp, acd);
2652 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2653 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2654 }
2655 }
2656
2657
2658 void
2659 rf_ReconThread(struct rf_recon_req *req)
2660 {
2661 int s;
2662 RF_Raid_t *raidPtr;
2663
2664 s = splbio();
2665 raidPtr = (RF_Raid_t *) req->raidPtr;
2666 raidPtr->recon_in_progress = 1;
2667
2668 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2669 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2670
2671 RF_Free(req, sizeof(*req));
2672
2673 raidPtr->recon_in_progress = 0;
2674 splx(s);
2675
2676 /* That's all... */
2677 kthread_exit(0); /* does not return */
2678 }
2679
2680 void
2681 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2682 {
2683 int retcode;
2684 int s;
2685
2686 raidPtr->parity_rewrite_stripes_done = 0;
2687 raidPtr->parity_rewrite_in_progress = 1;
2688 s = splbio();
2689 retcode = rf_RewriteParity(raidPtr);
2690 splx(s);
2691 if (retcode) {
2692 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2693 } else {
2694 /* set the clean bit! If we shutdown correctly,
2695 the clean bit on each component label will get
2696 set */
2697 raidPtr->parity_good = RF_RAID_CLEAN;
2698 }
2699 raidPtr->parity_rewrite_in_progress = 0;
2700
2701 /* Anyone waiting for us to stop? If so, inform them... */
2702 if (raidPtr->waitShutdown) {
2703 wakeup(&raidPtr->parity_rewrite_in_progress);
2704 }
2705
2706 /* That's all... */
2707 kthread_exit(0); /* does not return */
2708 }
2709
2710
2711 void
2712 rf_CopybackThread(RF_Raid_t *raidPtr)
2713 {
2714 int s;
2715
2716 raidPtr->copyback_in_progress = 1;
2717 s = splbio();
2718 rf_CopybackReconstructedData(raidPtr);
2719 splx(s);
2720 raidPtr->copyback_in_progress = 0;
2721
2722 /* That's all... */
2723 kthread_exit(0); /* does not return */
2724 }
2725
2726
2727 void
2728 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2729 {
2730 int s;
2731 RF_Raid_t *raidPtr;
2732
2733 s = splbio();
2734 raidPtr = req->raidPtr;
2735 raidPtr->recon_in_progress = 1;
2736 rf_ReconstructInPlace(raidPtr, req->col);
2737 RF_Free(req, sizeof(*req));
2738 raidPtr->recon_in_progress = 0;
2739 splx(s);
2740
2741 /* That's all... */
2742 kthread_exit(0); /* does not return */
2743 }
2744
2745 RF_AutoConfig_t *
2746 rf_find_raid_components()
2747 {
2748 struct vnode *vp;
2749 struct disklabel label;
2750 struct device *dv;
2751 dev_t dev;
2752 int bmajor;
2753 int error;
2754 int i;
2755 int good_one;
2756 RF_ComponentLabel_t *clabel;
2757 RF_AutoConfig_t *ac_list;
2758 RF_AutoConfig_t *ac;
2759
2760
2761 /* initialize the AutoConfig list */
2762 ac_list = NULL;
2763
2764 /* we begin by trolling through *all* the devices on the system */
2765
2766 for (dv = alldevs.tqh_first; dv != NULL;
2767 dv = dv->dv_list.tqe_next) {
2768
2769 /* we are only interested in disks... */
2770 if (dv->dv_class != DV_DISK)
2771 continue;
2772
2773 /* we don't care about floppies... */
2774 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2775 continue;
2776 }
2777
2778 /* we don't care about CD's... */
2779 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2780 continue;
2781 }
2782
2783 /* hdfd is the Atari/Hades floppy driver */
2784 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2785 continue;
2786 }
2787 /* fdisa is the Atari/Milan floppy driver */
2788 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2789 continue;
2790 }
2791
2792 /* need to find the device_name_to_block_device_major stuff */
2793 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2794
2795 /* get a vnode for the raw partition of this disk */
2796
2797 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2798 if (bdevvp(dev, &vp))
2799 panic("RAID can't alloc vnode");
2800
2801 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2802
2803 if (error) {
2804 /* "Who cares." Continue looking
2805 for something that exists*/
2806 vput(vp);
2807 continue;
2808 }
2809
2810 /* Ok, the disk exists. Go get the disklabel. */
2811 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2812 if (error) {
2813 /*
2814 * XXX can't happen - open() would
2815 * have errored out (or faked up one)
2816 */
2817 if (error != ENOTTY)
2818 printf("RAIDframe: can't get label for dev "
2819 "%s (%d)\n", dv->dv_xname, error);
2820 }
2821
2822 /* don't need this any more. We'll allocate it again
2823 a little later if we really do... */
2824 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2825 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2826 vput(vp);
2827
2828 if (error)
2829 continue;
2830
2831 for (i=0; i < label.d_npartitions; i++) {
2832 /* We only support partitions marked as RAID */
2833 if (label.d_partitions[i].p_fstype != FS_RAID)
2834 continue;
2835
2836 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2837 if (bdevvp(dev, &vp))
2838 panic("RAID can't alloc vnode");
2839
2840 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2841 if (error) {
2842 /* Whatever... */
2843 vput(vp);
2844 continue;
2845 }
2846
2847 good_one = 0;
2848
2849 clabel = (RF_ComponentLabel_t *)
2850 malloc(sizeof(RF_ComponentLabel_t),
2851 M_RAIDFRAME, M_NOWAIT);
2852 if (clabel == NULL) {
2853 /* XXX CLEANUP HERE */
2854 printf("RAID auto config: out of memory!\n");
2855 return(NULL); /* XXX probably should panic? */
2856 }
2857
2858 if (!raidread_component_label(dev, vp, clabel)) {
2859 /* Got the label. Does it look reasonable? */
2860 if (rf_reasonable_label(clabel) &&
2861 (clabel->partitionSize <=
2862 label.d_partitions[i].p_size)) {
2863 #if DEBUG
2864 printf("Component on: %s%c: %d\n",
2865 dv->dv_xname, 'a'+i,
2866 label.d_partitions[i].p_size);
2867 rf_print_component_label(clabel);
2868 #endif
2869 /* if it's reasonable, add it,
2870 else ignore it. */
2871 ac = (RF_AutoConfig_t *)
2872 malloc(sizeof(RF_AutoConfig_t),
2873 M_RAIDFRAME,
2874 M_NOWAIT);
2875 if (ac == NULL) {
2876 /* XXX should panic?? */
2877 return(NULL);
2878 }
2879
2880 snprintf(ac->devname,
2881 sizeof(ac->devname), "%s%c",
2882 dv->dv_xname, 'a'+i);
2883 ac->dev = dev;
2884 ac->vp = vp;
2885 ac->clabel = clabel;
2886 ac->next = ac_list;
2887 ac_list = ac;
2888 good_one = 1;
2889 }
2890 }
2891 if (!good_one) {
2892 /* cleanup */
2893 free(clabel, M_RAIDFRAME);
2894 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2895 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2896 vput(vp);
2897 }
2898 }
2899 }
2900 return(ac_list);
2901 }
2902
2903 static int
2904 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2905 {
2906
2907 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2908 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2909 ((clabel->clean == RF_RAID_CLEAN) ||
2910 (clabel->clean == RF_RAID_DIRTY)) &&
2911 clabel->row >=0 &&
2912 clabel->column >= 0 &&
2913 clabel->num_rows > 0 &&
2914 clabel->num_columns > 0 &&
2915 clabel->row < clabel->num_rows &&
2916 clabel->column < clabel->num_columns &&
2917 clabel->blockSize > 0 &&
2918 clabel->numBlocks > 0) {
2919 /* label looks reasonable enough... */
2920 return(1);
2921 }
2922 return(0);
2923 }
2924
2925
2926 #if DEBUG
2927 void
2928 rf_print_component_label(RF_ComponentLabel_t *clabel)
2929 {
2930 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2931 clabel->row, clabel->column,
2932 clabel->num_rows, clabel->num_columns);
2933 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2934 clabel->version, clabel->serial_number,
2935 clabel->mod_counter);
2936 printf(" Clean: %s Status: %d\n",
2937 clabel->clean ? "Yes" : "No", clabel->status );
2938 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2939 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2940 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2941 (char) clabel->parityConfig, clabel->blockSize,
2942 clabel->numBlocks);
2943 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2944 printf(" Contains root partition: %s\n",
2945 clabel->root_partition ? "Yes" : "No" );
2946 printf(" Last configured as: raid%d\n", clabel->last_unit );
2947 #if 0
2948 printf(" Config order: %d\n", clabel->config_order);
2949 #endif
2950
2951 }
2952 #endif
2953
2954 RF_ConfigSet_t *
2955 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2956 {
2957 RF_AutoConfig_t *ac;
2958 RF_ConfigSet_t *config_sets;
2959 RF_ConfigSet_t *cset;
2960 RF_AutoConfig_t *ac_next;
2961
2962
2963 config_sets = NULL;
2964
2965 /* Go through the AutoConfig list, and figure out which components
2966 belong to what sets. */
2967 ac = ac_list;
2968 while(ac!=NULL) {
2969 /* we're going to putz with ac->next, so save it here
2970 for use at the end of the loop */
2971 ac_next = ac->next;
2972
2973 if (config_sets == NULL) {
2974 /* will need at least this one... */
2975 config_sets = (RF_ConfigSet_t *)
2976 malloc(sizeof(RF_ConfigSet_t),
2977 M_RAIDFRAME, M_NOWAIT);
2978 if (config_sets == NULL) {
2979 panic("rf_create_auto_sets: No memory!");
2980 }
2981 /* this one is easy :) */
2982 config_sets->ac = ac;
2983 config_sets->next = NULL;
2984 config_sets->rootable = 0;
2985 ac->next = NULL;
2986 } else {
2987 /* which set does this component fit into? */
2988 cset = config_sets;
2989 while(cset!=NULL) {
2990 if (rf_does_it_fit(cset, ac)) {
2991 /* looks like it matches... */
2992 ac->next = cset->ac;
2993 cset->ac = ac;
2994 break;
2995 }
2996 cset = cset->next;
2997 }
2998 if (cset==NULL) {
2999 /* didn't find a match above... new set..*/
3000 cset = (RF_ConfigSet_t *)
3001 malloc(sizeof(RF_ConfigSet_t),
3002 M_RAIDFRAME, M_NOWAIT);
3003 if (cset == NULL) {
3004 panic("rf_create_auto_sets: No memory!");
3005 }
3006 cset->ac = ac;
3007 ac->next = NULL;
3008 cset->next = config_sets;
3009 cset->rootable = 0;
3010 config_sets = cset;
3011 }
3012 }
3013 ac = ac_next;
3014 }
3015
3016
3017 return(config_sets);
3018 }
3019
3020 static int
3021 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3022 {
3023 RF_ComponentLabel_t *clabel1, *clabel2;
3024
3025 /* If this one matches the *first* one in the set, that's good
3026 enough, since the other members of the set would have been
3027 through here too... */
3028 /* note that we are not checking partitionSize here..
3029
3030 Note that we are also not checking the mod_counters here.
3031 If everything else matches execpt the mod_counter, that's
3032 good enough for this test. We will deal with the mod_counters
3033 a little later in the autoconfiguration process.
3034
3035 (clabel1->mod_counter == clabel2->mod_counter) &&
3036
3037 The reason we don't check for this is that failed disks
3038 will have lower modification counts. If those disks are
3039 not added to the set they used to belong to, then they will
3040 form their own set, which may result in 2 different sets,
3041 for example, competing to be configured at raid0, and
3042 perhaps competing to be the root filesystem set. If the
3043 wrong ones get configured, or both attempt to become /,
3044 weird behaviour and or serious lossage will occur. Thus we
3045 need to bring them into the fold here, and kick them out at
3046 a later point.
3047
3048 */
3049
3050 clabel1 = cset->ac->clabel;
3051 clabel2 = ac->clabel;
3052 if ((clabel1->version == clabel2->version) &&
3053 (clabel1->serial_number == clabel2->serial_number) &&
3054 (clabel1->num_rows == clabel2->num_rows) &&
3055 (clabel1->num_columns == clabel2->num_columns) &&
3056 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3057 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3058 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3059 (clabel1->parityConfig == clabel2->parityConfig) &&
3060 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3061 (clabel1->blockSize == clabel2->blockSize) &&
3062 (clabel1->numBlocks == clabel2->numBlocks) &&
3063 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3064 (clabel1->root_partition == clabel2->root_partition) &&
3065 (clabel1->last_unit == clabel2->last_unit) &&
3066 (clabel1->config_order == clabel2->config_order)) {
3067 /* if it get's here, it almost *has* to be a match */
3068 } else {
3069 /* it's not consistent with somebody in the set..
3070 punt */
3071 return(0);
3072 }
3073 /* all was fine.. it must fit... */
3074 return(1);
3075 }
3076
3077 int
3078 rf_have_enough_components(RF_ConfigSet_t *cset)
3079 {
3080 RF_AutoConfig_t *ac;
3081 RF_AutoConfig_t *auto_config;
3082 RF_ComponentLabel_t *clabel;
3083 int c;
3084 int num_cols;
3085 int num_missing;
3086 int mod_counter;
3087 int mod_counter_found;
3088 int even_pair_failed;
3089 char parity_type;
3090
3091
3092 /* check to see that we have enough 'live' components
3093 of this set. If so, we can configure it if necessary */
3094
3095 num_cols = cset->ac->clabel->num_columns;
3096 parity_type = cset->ac->clabel->parityConfig;
3097
3098 /* XXX Check for duplicate components!?!?!? */
3099
3100 /* Determine what the mod_counter is supposed to be for this set. */
3101
3102 mod_counter_found = 0;
3103 mod_counter = 0;
3104 ac = cset->ac;
3105 while(ac!=NULL) {
3106 if (mod_counter_found==0) {
3107 mod_counter = ac->clabel->mod_counter;
3108 mod_counter_found = 1;
3109 } else {
3110 if (ac->clabel->mod_counter > mod_counter) {
3111 mod_counter = ac->clabel->mod_counter;
3112 }
3113 }
3114 ac = ac->next;
3115 }
3116
3117 num_missing = 0;
3118 auto_config = cset->ac;
3119
3120 even_pair_failed = 0;
3121 for(c=0; c<num_cols; c++) {
3122 ac = auto_config;
3123 while(ac!=NULL) {
3124 if ((ac->clabel->column == c) &&
3125 (ac->clabel->mod_counter == mod_counter)) {
3126 /* it's this one... */
3127 #if DEBUG
3128 printf("Found: %s at %d\n",
3129 ac->devname,c);
3130 #endif
3131 break;
3132 }
3133 ac=ac->next;
3134 }
3135 if (ac==NULL) {
3136 /* Didn't find one here! */
3137 /* special case for RAID 1, especially
3138 where there are more than 2
3139 components (where RAIDframe treats
3140 things a little differently :( ) */
3141 if (parity_type == '1') {
3142 if (c%2 == 0) { /* even component */
3143 even_pair_failed = 1;
3144 } else { /* odd component. If
3145 we're failed, and
3146 so is the even
3147 component, it's
3148 "Good Night, Charlie" */
3149 if (even_pair_failed == 1) {
3150 return(0);
3151 }
3152 }
3153 } else {
3154 /* normal accounting */
3155 num_missing++;
3156 }
3157 }
3158 if ((parity_type == '1') && (c%2 == 1)) {
3159 /* Just did an even component, and we didn't
3160 bail.. reset the even_pair_failed flag,
3161 and go on to the next component.... */
3162 even_pair_failed = 0;
3163 }
3164 }
3165
3166 clabel = cset->ac->clabel;
3167
3168 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3169 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3170 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3171 /* XXX this needs to be made *much* more general */
3172 /* Too many failures */
3173 return(0);
3174 }
3175 /* otherwise, all is well, and we've got enough to take a kick
3176 at autoconfiguring this set */
3177 return(1);
3178 }
3179
3180 void
3181 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3182 RF_Raid_t *raidPtr)
3183 {
3184 RF_ComponentLabel_t *clabel;
3185 int i;
3186
3187 clabel = ac->clabel;
3188
3189 /* 1. Fill in the common stuff */
3190 config->numRow = clabel->num_rows = 1;
3191 config->numCol = clabel->num_columns;
3192 config->numSpare = 0; /* XXX should this be set here? */
3193 config->sectPerSU = clabel->sectPerSU;
3194 config->SUsPerPU = clabel->SUsPerPU;
3195 config->SUsPerRU = clabel->SUsPerRU;
3196 config->parityConfig = clabel->parityConfig;
3197 /* XXX... */
3198 strcpy(config->diskQueueType,"fifo");
3199 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3200 config->layoutSpecificSize = 0; /* XXX ?? */
3201
3202 while(ac!=NULL) {
3203 /* row/col values will be in range due to the checks
3204 in reasonable_label() */
3205 strcpy(config->devnames[0][ac->clabel->column],
3206 ac->devname);
3207 ac = ac->next;
3208 }
3209
3210 for(i=0;i<RF_MAXDBGV;i++) {
3211 config->debugVars[i][0] = 0;
3212 }
3213 }
3214
3215 int
3216 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3217 {
3218 RF_ComponentLabel_t clabel;
3219 struct vnode *vp;
3220 dev_t dev;
3221 int column;
3222 int sparecol;
3223
3224 raidPtr->autoconfigure = new_value;
3225
3226 for(column=0; column<raidPtr->numCol; column++) {
3227 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3228 dev = raidPtr->Disks[column].dev;
3229 vp = raidPtr->raid_cinfo[column].ci_vp;
3230 raidread_component_label(dev, vp, &clabel);
3231 clabel.autoconfigure = new_value;
3232 raidwrite_component_label(dev, vp, &clabel);
3233 }
3234 }
3235 for(column = 0; column < raidPtr->numSpare ; column++) {
3236 sparecol = raidPtr->numCol + column;
3237 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3238 dev = raidPtr->Disks[sparecol].dev;
3239 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3240 raidread_component_label(dev, vp, &clabel);
3241 clabel.autoconfigure = new_value;
3242 raidwrite_component_label(dev, vp, &clabel);
3243 }
3244 }
3245 return(new_value);
3246 }
3247
3248 int
3249 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3250 {
3251 RF_ComponentLabel_t clabel;
3252 struct vnode *vp;
3253 dev_t dev;
3254 int column;
3255 int sparecol;
3256
3257 raidPtr->root_partition = new_value;
3258 for(column=0; column<raidPtr->numCol; column++) {
3259 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3260 dev = raidPtr->Disks[column].dev;
3261 vp = raidPtr->raid_cinfo[column].ci_vp;
3262 raidread_component_label(dev, vp, &clabel);
3263 clabel.root_partition = new_value;
3264 raidwrite_component_label(dev, vp, &clabel);
3265 }
3266 }
3267 for(column = 0; column < raidPtr->numSpare ; column++) {
3268 sparecol = raidPtr->numCol + column;
3269 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3270 dev = raidPtr->Disks[sparecol].dev;
3271 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3272 raidread_component_label(dev, vp, &clabel);
3273 clabel.root_partition = new_value;
3274 raidwrite_component_label(dev, vp, &clabel);
3275 }
3276 }
3277 return(new_value);
3278 }
3279
3280 void
3281 rf_release_all_vps(RF_ConfigSet_t *cset)
3282 {
3283 RF_AutoConfig_t *ac;
3284
3285 ac = cset->ac;
3286 while(ac!=NULL) {
3287 /* Close the vp, and give it back */
3288 if (ac->vp) {
3289 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3290 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3291 vput(ac->vp);
3292 ac->vp = NULL;
3293 }
3294 ac = ac->next;
3295 }
3296 }
3297
3298
3299 void
3300 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3301 {
3302 RF_AutoConfig_t *ac;
3303 RF_AutoConfig_t *next_ac;
3304
3305 ac = cset->ac;
3306 while(ac!=NULL) {
3307 next_ac = ac->next;
3308 /* nuke the label */
3309 free(ac->clabel, M_RAIDFRAME);
3310 /* cleanup the config structure */
3311 free(ac, M_RAIDFRAME);
3312 /* "next.." */
3313 ac = next_ac;
3314 }
3315 /* and, finally, nuke the config set */
3316 free(cset, M_RAIDFRAME);
3317 }
3318
3319
3320 void
3321 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3322 {
3323 /* current version number */
3324 clabel->version = RF_COMPONENT_LABEL_VERSION;
3325 clabel->serial_number = raidPtr->serial_number;
3326 clabel->mod_counter = raidPtr->mod_counter;
3327 clabel->num_rows = 1;
3328 clabel->num_columns = raidPtr->numCol;
3329 clabel->clean = RF_RAID_DIRTY; /* not clean */
3330 clabel->status = rf_ds_optimal; /* "It's good!" */
3331
3332 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3333 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3334 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3335
3336 clabel->blockSize = raidPtr->bytesPerSector;
3337 clabel->numBlocks = raidPtr->sectorsPerDisk;
3338
3339 /* XXX not portable */
3340 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3341 clabel->maxOutstanding = raidPtr->maxOutstanding;
3342 clabel->autoconfigure = raidPtr->autoconfigure;
3343 clabel->root_partition = raidPtr->root_partition;
3344 clabel->last_unit = raidPtr->raidid;
3345 clabel->config_order = raidPtr->config_order;
3346 }
3347
3348 int
3349 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3350 {
3351 RF_Raid_t *raidPtr;
3352 RF_Config_t *config;
3353 int raidID;
3354 int retcode;
3355
3356 #if DEBUG
3357 printf("RAID autoconfigure\n");
3358 #endif
3359
3360 retcode = 0;
3361 *unit = -1;
3362
3363 /* 1. Create a config structure */
3364
3365 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3366 M_RAIDFRAME,
3367 M_NOWAIT);
3368 if (config==NULL) {
3369 printf("Out of mem!?!?\n");
3370 /* XXX do something more intelligent here. */
3371 return(1);
3372 }
3373
3374 memset(config, 0, sizeof(RF_Config_t));
3375
3376 /*
3377 2. Figure out what RAID ID this one is supposed to live at
3378 See if we can get the same RAID dev that it was configured
3379 on last time..
3380 */
3381
3382 raidID = cset->ac->clabel->last_unit;
3383 if ((raidID < 0) || (raidID >= numraid)) {
3384 /* let's not wander off into lala land. */
3385 raidID = numraid - 1;
3386 }
3387 if (raidPtrs[raidID]->valid != 0) {
3388
3389 /*
3390 Nope... Go looking for an alternative...
3391 Start high so we don't immediately use raid0 if that's
3392 not taken.
3393 */
3394
3395 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3396 if (raidPtrs[raidID]->valid == 0) {
3397 /* can use this one! */
3398 break;
3399 }
3400 }
3401 }
3402
3403 if (raidID < 0) {
3404 /* punt... */
3405 printf("Unable to auto configure this set!\n");
3406 printf("(Out of RAID devs!)\n");
3407 return(1);
3408 }
3409
3410 #if DEBUG
3411 printf("Configuring raid%d:\n",raidID);
3412 #endif
3413
3414 raidPtr = raidPtrs[raidID];
3415
3416 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3417 raidPtr->raidid = raidID;
3418 raidPtr->openings = RAIDOUTSTANDING;
3419
3420 /* 3. Build the configuration structure */
3421 rf_create_configuration(cset->ac, config, raidPtr);
3422
3423 /* 4. Do the configuration */
3424 retcode = rf_Configure(raidPtr, config, cset->ac);
3425
3426 if (retcode == 0) {
3427
3428 raidinit(raidPtrs[raidID]);
3429
3430 rf_markalldirty(raidPtrs[raidID]);
3431 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3432 if (cset->ac->clabel->root_partition==1) {
3433 /* everything configured just fine. Make a note
3434 that this set is eligible to be root. */
3435 cset->rootable = 1;
3436 /* XXX do this here? */
3437 raidPtrs[raidID]->root_partition = 1;
3438 }
3439 }
3440
3441 /* 5. Cleanup */
3442 free(config, M_RAIDFRAME);
3443
3444 *unit = raidID;
3445 return(retcode);
3446 }
3447
3448 void
3449 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3450 {
3451 struct buf *bp;
3452
3453 bp = (struct buf *)desc->bp;
3454 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3455 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3456 }
3457
3458 void
3459 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3460 size_t xmin, size_t xmax)
3461 {
3462 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3463 pool_sethiwat(p, xmax);
3464 pool_prime(p, xmin);
3465 pool_setlowat(p, xmin);
3466 }
3467
3468 /*
3469 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3470 * if there is IO pending and if that IO could possibly be done for a
3471 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3472 * otherwise.
3473 *
3474 */
3475
3476 int
3477 rf_buf_queue_check(int raidid)
3478 {
3479 if ((BUFQ_PEEK(&(raid_softc[raidid].buf_queue)) != NULL) &&
3480 raidPtrs[raidid]->openings > 0) {
3481 /* there is work to do */
3482 return 0;
3483 }
3484 /* default is nothing to do */
3485 return 1;
3486 }
3487