rf_netbsdkintf.c revision 1.161.2.5 1 /* $NetBSD: rf_netbsdkintf.c,v 1.161.2.5 2004/10/19 15:57:27 skrll Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.161.2.5 2004/10/19 15:57:27 skrll Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* XXX Not sure if the following should be replacing the raidPtrs above,
248 or if it should be used in conjunction with that...
249 */
250
251 struct raid_softc {
252 int sc_flags; /* flags */
253 int sc_cflags; /* configuration flags */
254 size_t sc_size; /* size of the raid device */
255 char sc_xname[20]; /* XXX external name */
256 struct disk sc_dkdev; /* generic disk device info */
257 struct bufq_state buf_queue; /* used for the device queue */
258 };
259 /* sc_flags */
260 #define RAIDF_INITED 0x01 /* unit has been initialized */
261 #define RAIDF_WLABEL 0x02 /* label area is writable */
262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
264 #define RAIDF_LOCKED 0x80 /* unit is locked */
265
266 #define raidunit(x) DISKUNIT(x)
267 int numraid = 0;
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294 struct raid_softc *raid_softc;
295
296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
297 struct disklabel *);
298 static void raidgetdisklabel(dev_t);
299 static void raidmakedisklabel(struct raid_softc *);
300
301 static int raidlock(struct raid_softc *);
302 static void raidunlock(struct raid_softc *);
303
304 static void rf_markalldirty(RF_Raid_t *);
305
306 struct device *raidrootdev;
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(struct device *self);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 static int rf_reasonable_label(RF_ComponentLabel_t *);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 int rf_auto_config_set(RF_ConfigSet_t *, int *);
326
327 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
328 allow autoconfig to take place.
329 Note that this is overridden by having
330 RAID_AUTOCONFIG as an option in the
331 kernel config file. */
332
333 struct RF_Pools_s rf_pools;
334
335 void
336 raidattach(int num)
337 {
338 int raidID;
339 int i, rc;
340
341 #ifdef DEBUG
342 printf("raidattach: Asked for %d units\n", num);
343 #endif
344
345 if (num <= 0) {
346 #ifdef DIAGNOSTIC
347 panic("raidattach: count <= 0");
348 #endif
349 return;
350 }
351 /* This is where all the initialization stuff gets done. */
352
353 numraid = num;
354
355 /* Make some space for requested number of units... */
356
357 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
358 if (raidPtrs == NULL) {
359 panic("raidPtrs is NULL!!");
360 }
361
362 /* Initialize the component buffer pool. */
363 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
364 "raidpl", num * RAIDOUTSTANDING,
365 2 * num * RAIDOUTSTANDING);
366
367 rf_mutex_init(&rf_sparet_wait_mutex);
368
369 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
370
371 for (i = 0; i < num; i++)
372 raidPtrs[i] = NULL;
373 rc = rf_BootRaidframe();
374 if (rc == 0)
375 printf("Kernelized RAIDframe activated\n");
376 else
377 panic("Serious error booting RAID!!");
378
379 /* put together some datastructures like the CCD device does.. This
380 * lets us lock the device and what-not when it gets opened. */
381
382 raid_softc = (struct raid_softc *)
383 malloc(num * sizeof(struct raid_softc),
384 M_RAIDFRAME, M_NOWAIT);
385 if (raid_softc == NULL) {
386 printf("WARNING: no memory for RAIDframe driver\n");
387 return;
388 }
389
390 memset(raid_softc, 0, num * sizeof(struct raid_softc));
391
392 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
393 M_RAIDFRAME, M_NOWAIT);
394 if (raidrootdev == NULL) {
395 panic("No memory for RAIDframe driver!!?!?!");
396 }
397
398 for (raidID = 0; raidID < num; raidID++) {
399 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
400
401 raidrootdev[raidID].dv_class = DV_DISK;
402 raidrootdev[raidID].dv_cfdata = NULL;
403 raidrootdev[raidID].dv_unit = raidID;
404 raidrootdev[raidID].dv_parent = NULL;
405 raidrootdev[raidID].dv_flags = 0;
406 snprintf(raidrootdev[raidID].dv_xname,
407 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
408
409 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
410 (RF_Raid_t *));
411 if (raidPtrs[raidID] == NULL) {
412 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
413 numraid = raidID;
414 return;
415 }
416 }
417
418 #ifdef RAID_AUTOCONFIG
419 raidautoconfig = 1;
420 #endif
421
422 /*
423 * Register a finalizer which will be used to auto-config RAID
424 * sets once all real hardware devices have been found.
425 */
426 if (config_finalize_register(NULL, rf_autoconfig) != 0)
427 printf("WARNING: unable to register RAIDframe finalizer\n");
428 }
429
430 int
431 rf_autoconfig(struct device *self)
432 {
433 RF_AutoConfig_t *ac_list;
434 RF_ConfigSet_t *config_sets;
435
436 if (raidautoconfig == 0)
437 return (0);
438
439 /* XXX This code can only be run once. */
440 raidautoconfig = 0;
441
442 /* 1. locate all RAID components on the system */
443 #ifdef DEBUG
444 printf("Searching for RAID components...\n");
445 #endif
446 ac_list = rf_find_raid_components();
447
448 /* 2. Sort them into their respective sets. */
449 config_sets = rf_create_auto_sets(ac_list);
450
451 /*
452 * 3. Evaluate each set andconfigure the valid ones.
453 * This gets done in rf_buildroothack().
454 */
455 rf_buildroothack(config_sets);
456
457 return (1);
458 }
459
460 void
461 rf_buildroothack(RF_ConfigSet_t *config_sets)
462 {
463 RF_ConfigSet_t *cset;
464 RF_ConfigSet_t *next_cset;
465 int retcode;
466 int raidID;
467 int rootID;
468 int num_root;
469
470 rootID = 0;
471 num_root = 0;
472 cset = config_sets;
473 while(cset != NULL ) {
474 next_cset = cset->next;
475 if (rf_have_enough_components(cset) &&
476 cset->ac->clabel->autoconfigure==1) {
477 retcode = rf_auto_config_set(cset,&raidID);
478 if (!retcode) {
479 if (cset->rootable) {
480 rootID = raidID;
481 num_root++;
482 }
483 } else {
484 /* The autoconfig didn't work :( */
485 #if DEBUG
486 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
487 #endif
488 rf_release_all_vps(cset);
489 }
490 } else {
491 /* we're not autoconfiguring this set...
492 release the associated resources */
493 rf_release_all_vps(cset);
494 }
495 /* cleanup */
496 rf_cleanup_config_set(cset);
497 cset = next_cset;
498 }
499
500 /* we found something bootable... */
501
502 if (num_root == 1) {
503 booted_device = &raidrootdev[rootID];
504 } else if (num_root > 1) {
505 /* we can't guess.. require the user to answer... */
506 boothowto |= RB_ASKNAME;
507 }
508 }
509
510
511 int
512 raidsize(dev_t dev)
513 {
514 struct raid_softc *rs;
515 struct disklabel *lp;
516 int part, unit, omask, size;
517
518 unit = raidunit(dev);
519 if (unit >= numraid)
520 return (-1);
521 rs = &raid_softc[unit];
522
523 if ((rs->sc_flags & RAIDF_INITED) == 0)
524 return (-1);
525
526 part = DISKPART(dev);
527 omask = rs->sc_dkdev.dk_openmask & (1 << part);
528 lp = rs->sc_dkdev.dk_label;
529
530 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
531 return (-1);
532
533 if (lp->d_partitions[part].p_fstype != FS_SWAP)
534 size = -1;
535 else
536 size = lp->d_partitions[part].p_size *
537 (lp->d_secsize / DEV_BSIZE);
538
539 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
540 return (-1);
541
542 return (size);
543
544 }
545
546 int
547 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
548 {
549 /* Not implemented. */
550 return ENXIO;
551 }
552 /* ARGSUSED */
553 int
554 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
555 {
556 int unit = raidunit(dev);
557 struct raid_softc *rs;
558 struct disklabel *lp;
559 int part, pmask;
560 int error = 0;
561
562 if (unit >= numraid)
563 return (ENXIO);
564 rs = &raid_softc[unit];
565
566 if ((error = raidlock(rs)) != 0)
567 return (error);
568 lp = rs->sc_dkdev.dk_label;
569
570 part = DISKPART(dev);
571 pmask = (1 << part);
572
573 if ((rs->sc_flags & RAIDF_INITED) &&
574 (rs->sc_dkdev.dk_openmask == 0))
575 raidgetdisklabel(dev);
576
577 /* make sure that this partition exists */
578
579 if (part != RAW_PART) {
580 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
581 ((part >= lp->d_npartitions) ||
582 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
583 error = ENXIO;
584 raidunlock(rs);
585 return (error);
586 }
587 }
588 /* Prevent this unit from being unconfigured while open. */
589 switch (fmt) {
590 case S_IFCHR:
591 rs->sc_dkdev.dk_copenmask |= pmask;
592 break;
593
594 case S_IFBLK:
595 rs->sc_dkdev.dk_bopenmask |= pmask;
596 break;
597 }
598
599 if ((rs->sc_dkdev.dk_openmask == 0) &&
600 ((rs->sc_flags & RAIDF_INITED) != 0)) {
601 /* First one... mark things as dirty... Note that we *MUST*
602 have done a configure before this. I DO NOT WANT TO BE
603 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
604 THAT THEY BELONG TOGETHER!!!!! */
605 /* XXX should check to see if we're only open for reading
606 here... If so, we needn't do this, but then need some
607 other way of keeping track of what's happened.. */
608
609 rf_markalldirty( raidPtrs[unit] );
610 }
611
612
613 rs->sc_dkdev.dk_openmask =
614 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
615
616 raidunlock(rs);
617
618 return (error);
619
620
621 }
622 /* ARGSUSED */
623 int
624 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
625 {
626 int unit = raidunit(dev);
627 struct raid_softc *rs;
628 int error = 0;
629 int part;
630
631 if (unit >= numraid)
632 return (ENXIO);
633 rs = &raid_softc[unit];
634
635 if ((error = raidlock(rs)) != 0)
636 return (error);
637
638 part = DISKPART(dev);
639
640 /* ...that much closer to allowing unconfiguration... */
641 switch (fmt) {
642 case S_IFCHR:
643 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
644 break;
645
646 case S_IFBLK:
647 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
648 break;
649 }
650 rs->sc_dkdev.dk_openmask =
651 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
652
653 if ((rs->sc_dkdev.dk_openmask == 0) &&
654 ((rs->sc_flags & RAIDF_INITED) != 0)) {
655 /* Last one... device is not unconfigured yet.
656 Device shutdown has taken care of setting the
657 clean bits if RAIDF_INITED is not set
658 mark things as clean... */
659
660 rf_update_component_labels(raidPtrs[unit],
661 RF_FINAL_COMPONENT_UPDATE);
662 if (doing_shutdown) {
663 /* last one, and we're going down, so
664 lights out for this RAID set too. */
665 error = rf_Shutdown(raidPtrs[unit]);
666
667 /* It's no longer initialized... */
668 rs->sc_flags &= ~RAIDF_INITED;
669
670 /* Detach the disk. */
671 disk_detach(&rs->sc_dkdev);
672 }
673 }
674
675 raidunlock(rs);
676 return (0);
677
678 }
679
680 void
681 raidstrategy(struct buf *bp)
682 {
683 int s;
684
685 unsigned int raidID = raidunit(bp->b_dev);
686 RF_Raid_t *raidPtr;
687 struct raid_softc *rs = &raid_softc[raidID];
688 int wlabel;
689
690 if ((rs->sc_flags & RAIDF_INITED) ==0) {
691 bp->b_error = ENXIO;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (raidID >= numraid || !raidPtrs[raidID]) {
698 bp->b_error = ENODEV;
699 bp->b_flags |= B_ERROR;
700 bp->b_resid = bp->b_bcount;
701 biodone(bp);
702 return;
703 }
704 raidPtr = raidPtrs[raidID];
705 if (!raidPtr->valid) {
706 bp->b_error = ENODEV;
707 bp->b_flags |= B_ERROR;
708 bp->b_resid = bp->b_bcount;
709 biodone(bp);
710 return;
711 }
712 if (bp->b_bcount == 0) {
713 db1_printf(("b_bcount is zero..\n"));
714 biodone(bp);
715 return;
716 }
717
718 /*
719 * Do bounds checking and adjust transfer. If there's an
720 * error, the bounds check will flag that for us.
721 */
722
723 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
724 if (DISKPART(bp->b_dev) != RAW_PART)
725 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
726 db1_printf(("Bounds check failed!!:%d %d\n",
727 (int) bp->b_blkno, (int) wlabel));
728 biodone(bp);
729 return;
730 }
731 s = splbio();
732
733 bp->b_resid = 0;
734
735 /* stuff it onto our queue */
736 BUFQ_PUT(&rs->buf_queue, bp);
737
738 raidstart(raidPtrs[raidID]);
739
740 splx(s);
741 }
742 /* ARGSUSED */
743 int
744 raidread(dev_t dev, struct uio *uio, int flags)
745 {
746 int unit = raidunit(dev);
747 struct raid_softc *rs;
748
749 if (unit >= numraid)
750 return (ENXIO);
751 rs = &raid_softc[unit];
752
753 if ((rs->sc_flags & RAIDF_INITED) == 0)
754 return (ENXIO);
755
756 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
757
758 }
759 /* ARGSUSED */
760 int
761 raidwrite(dev_t dev, struct uio *uio, int flags)
762 {
763 int unit = raidunit(dev);
764 struct raid_softc *rs;
765
766 if (unit >= numraid)
767 return (ENXIO);
768 rs = &raid_softc[unit];
769
770 if ((rs->sc_flags & RAIDF_INITED) == 0)
771 return (ENXIO);
772
773 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
774
775 }
776
777 int
778 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
779 {
780 int unit = raidunit(dev);
781 int error = 0;
782 int part, pmask;
783 struct raid_softc *rs;
784 RF_Config_t *k_cfg, *u_cfg;
785 RF_Raid_t *raidPtr;
786 RF_RaidDisk_t *diskPtr;
787 RF_AccTotals_t *totals;
788 RF_DeviceConfig_t *d_cfg, **ucfgp;
789 u_char *specific_buf;
790 int retcode = 0;
791 int column;
792 int raidid;
793 struct rf_recon_req *rrcopy, *rr;
794 RF_ComponentLabel_t *clabel;
795 RF_ComponentLabel_t ci_label;
796 RF_ComponentLabel_t **clabel_ptr;
797 RF_SingleComponent_t *sparePtr,*componentPtr;
798 RF_SingleComponent_t hot_spare;
799 RF_SingleComponent_t component;
800 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
801 int i, j, d;
802 #ifdef __HAVE_OLD_DISKLABEL
803 struct disklabel newlabel;
804 #endif
805
806 if (unit >= numraid)
807 return (ENXIO);
808 rs = &raid_softc[unit];
809 raidPtr = raidPtrs[unit];
810
811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
812 (int) DISKPART(dev), (int) unit, (int) cmd));
813
814 /* Must be open for writes for these commands... */
815 switch (cmd) {
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 #ifdef __HAVE_OLD_DISKLABEL
819 case ODIOCWDINFO:
820 case ODIOCSDINFO:
821 #endif
822 case DIOCWLABEL:
823 if ((flag & FWRITE) == 0)
824 return (EBADF);
825 }
826
827 /* Must be initialized for these... */
828 switch (cmd) {
829 case DIOCGDINFO:
830 case DIOCSDINFO:
831 case DIOCWDINFO:
832 #ifdef __HAVE_OLD_DISKLABEL
833 case ODIOCGDINFO:
834 case ODIOCWDINFO:
835 case ODIOCSDINFO:
836 case ODIOCGDEFLABEL:
837 #endif
838 case DIOCGPART:
839 case DIOCWLABEL:
840 case DIOCGDEFLABEL:
841 case RAIDFRAME_SHUTDOWN:
842 case RAIDFRAME_REWRITEPARITY:
843 case RAIDFRAME_GET_INFO:
844 case RAIDFRAME_RESET_ACCTOTALS:
845 case RAIDFRAME_GET_ACCTOTALS:
846 case RAIDFRAME_KEEP_ACCTOTALS:
847 case RAIDFRAME_GET_SIZE:
848 case RAIDFRAME_FAIL_DISK:
849 case RAIDFRAME_COPYBACK:
850 case RAIDFRAME_CHECK_RECON_STATUS:
851 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 case RAIDFRAME_SET_COMPONENT_LABEL:
854 case RAIDFRAME_ADD_HOT_SPARE:
855 case RAIDFRAME_REMOVE_HOT_SPARE:
856 case RAIDFRAME_INIT_LABELS:
857 case RAIDFRAME_REBUILD_IN_PLACE:
858 case RAIDFRAME_CHECK_PARITY:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
863 case RAIDFRAME_SET_AUTOCONFIG:
864 case RAIDFRAME_SET_ROOT:
865 case RAIDFRAME_DELETE_COMPONENT:
866 case RAIDFRAME_INCORPORATE_HOT_SPARE:
867 if ((rs->sc_flags & RAIDF_INITED) == 0)
868 return (ENXIO);
869 }
870
871 switch (cmd) {
872
873 /* configure the system */
874 case RAIDFRAME_CONFIGURE:
875
876 if (raidPtr->valid) {
877 /* There is a valid RAID set running on this unit! */
878 printf("raid%d: Device already configured!\n",unit);
879 return(EINVAL);
880 }
881
882 /* copy-in the configuration information */
883 /* data points to a pointer to the configuration structure */
884
885 u_cfg = *((RF_Config_t **) data);
886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
887 if (k_cfg == NULL) {
888 return (ENOMEM);
889 }
890 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
891 if (retcode) {
892 RF_Free(k_cfg, sizeof(RF_Config_t));
893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
894 retcode));
895 return (retcode);
896 }
897 /* allocate a buffer for the layout-specific data, and copy it
898 * in */
899 if (k_cfg->layoutSpecificSize) {
900 if (k_cfg->layoutSpecificSize > 10000) {
901 /* sanity check */
902 RF_Free(k_cfg, sizeof(RF_Config_t));
903 return (EINVAL);
904 }
905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
906 (u_char *));
907 if (specific_buf == NULL) {
908 RF_Free(k_cfg, sizeof(RF_Config_t));
909 return (ENOMEM);
910 }
911 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
912 k_cfg->layoutSpecificSize);
913 if (retcode) {
914 RF_Free(k_cfg, sizeof(RF_Config_t));
915 RF_Free(specific_buf,
916 k_cfg->layoutSpecificSize);
917 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
918 retcode));
919 return (retcode);
920 }
921 } else
922 specific_buf = NULL;
923 k_cfg->layoutSpecific = specific_buf;
924
925 /* should do some kind of sanity check on the configuration.
926 * Store the sum of all the bytes in the last byte? */
927
928 /* configure the system */
929
930 /*
931 * Clear the entire RAID descriptor, just to make sure
932 * there is no stale data left in the case of a
933 * reconfiguration
934 */
935 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
936 raidPtr->raidid = unit;
937
938 retcode = rf_Configure(raidPtr, k_cfg, NULL);
939
940 if (retcode == 0) {
941
942 /* allow this many simultaneous IO's to
943 this RAID device */
944 raidPtr->openings = RAIDOUTSTANDING;
945
946 raidinit(raidPtr);
947 rf_markalldirty(raidPtr);
948 }
949 /* free the buffers. No return code here. */
950 if (k_cfg->layoutSpecificSize) {
951 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
952 }
953 RF_Free(k_cfg, sizeof(RF_Config_t));
954
955 return (retcode);
956
957 /* shutdown the system */
958 case RAIDFRAME_SHUTDOWN:
959
960 if ((error = raidlock(rs)) != 0)
961 return (error);
962
963 /*
964 * If somebody has a partition mounted, we shouldn't
965 * shutdown.
966 */
967
968 part = DISKPART(dev);
969 pmask = (1 << part);
970 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
971 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
972 (rs->sc_dkdev.dk_copenmask & pmask))) {
973 raidunlock(rs);
974 return (EBUSY);
975 }
976
977 retcode = rf_Shutdown(raidPtr);
978
979 /* It's no longer initialized... */
980 rs->sc_flags &= ~RAIDF_INITED;
981
982 /* Detach the disk. */
983 disk_detach(&rs->sc_dkdev);
984
985 raidunlock(rs);
986
987 return (retcode);
988 case RAIDFRAME_GET_COMPONENT_LABEL:
989 clabel_ptr = (RF_ComponentLabel_t **) data;
990 /* need to read the component label for the disk indicated
991 by row,column in clabel */
992
993 /* For practice, let's get it directly fromdisk, rather
994 than from the in-core copy */
995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
996 (RF_ComponentLabel_t *));
997 if (clabel == NULL)
998 return (ENOMEM);
999
1000 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 clabel->row = 0; /* Don't allow looking at anything else.*/
1011
1012 column = clabel->column;
1013
1014 if ((column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 raidread_component_label(raidPtr->Disks[column].dev,
1021 raidPtr->raid_cinfo[column].ci_vp,
1022 clabel );
1023
1024 retcode = copyout(clabel, *clabel_ptr,
1025 sizeof(RF_ComponentLabel_t));
1026 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1027 return (retcode);
1028
1029 case RAIDFRAME_SET_COMPONENT_LABEL:
1030 clabel = (RF_ComponentLabel_t *) data;
1031
1032 /* XXX check the label for valid stuff... */
1033 /* Note that some things *should not* get modified --
1034 the user should be re-initing the labels instead of
1035 trying to patch things.
1036 */
1037
1038 raidid = raidPtr->raidid;
1039 #if DEBUG
1040 printf("raid%d: Got component label:\n", raidid);
1041 printf("raid%d: Version: %d\n", raidid, clabel->version);
1042 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1043 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1044 printf("raid%d: Column: %d\n", raidid, clabel->column);
1045 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1046 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1047 printf("raid%d: Status: %d\n", raidid, clabel->status);
1048 #endif
1049 clabel->row = 0;
1050 column = clabel->column;
1051
1052 if ((column < 0) || (column >= raidPtr->numCol)) {
1053 return(EINVAL);
1054 }
1055
1056 /* XXX this isn't allowed to do anything for now :-) */
1057
1058 /* XXX and before it is, we need to fill in the rest
1059 of the fields!?!?!?! */
1060 #if 0
1061 raidwrite_component_label(
1062 raidPtr->Disks[column].dev,
1063 raidPtr->raid_cinfo[column].ci_vp,
1064 clabel );
1065 #endif
1066 return (0);
1067
1068 case RAIDFRAME_INIT_LABELS:
1069 clabel = (RF_ComponentLabel_t *) data;
1070 /*
1071 we only want the serial number from
1072 the above. We get all the rest of the information
1073 from the config that was used to create this RAID
1074 set.
1075 */
1076
1077 raidPtr->serial_number = clabel->serial_number;
1078
1079 raid_init_component_label(raidPtr, &ci_label);
1080 ci_label.serial_number = clabel->serial_number;
1081 ci_label.row = 0; /* we dont' pretend to support more */
1082
1083 for(column=0;column<raidPtr->numCol;column++) {
1084 diskPtr = &raidPtr->Disks[column];
1085 if (!RF_DEAD_DISK(diskPtr->status)) {
1086 ci_label.partitionSize = diskPtr->partitionSize;
1087 ci_label.column = column;
1088 raidwrite_component_label(
1089 raidPtr->Disks[column].dev,
1090 raidPtr->raid_cinfo[column].ci_vp,
1091 &ci_label );
1092 }
1093 }
1094
1095 return (retcode);
1096 case RAIDFRAME_SET_AUTOCONFIG:
1097 d = rf_set_autoconfig(raidPtr, *(int *) data);
1098 printf("raid%d: New autoconfig value is: %d\n",
1099 raidPtr->raidid, d);
1100 *(int *) data = d;
1101 return (retcode);
1102
1103 case RAIDFRAME_SET_ROOT:
1104 d = rf_set_rootpartition(raidPtr, *(int *) data);
1105 printf("raid%d: New rootpartition value is: %d\n",
1106 raidPtr->raidid, d);
1107 *(int *) data = d;
1108 return (retcode);
1109
1110 /* initialize all parity */
1111 case RAIDFRAME_REWRITEPARITY:
1112
1113 if (raidPtr->Layout.map->faultsTolerated == 0) {
1114 /* Parity for RAID 0 is trivially correct */
1115 raidPtr->parity_good = RF_RAID_CLEAN;
1116 return(0);
1117 }
1118
1119 if (raidPtr->parity_rewrite_in_progress == 1) {
1120 /* Re-write is already in progress! */
1121 return(EINVAL);
1122 }
1123
1124 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1125 rf_RewriteParityThread,
1126 raidPtr,"raid_parity");
1127 return (retcode);
1128
1129
1130 case RAIDFRAME_ADD_HOT_SPARE:
1131 sparePtr = (RF_SingleComponent_t *) data;
1132 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1133 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1134 return(retcode);
1135
1136 case RAIDFRAME_REMOVE_HOT_SPARE:
1137 return(retcode);
1138
1139 case RAIDFRAME_DELETE_COMPONENT:
1140 componentPtr = (RF_SingleComponent_t *)data;
1141 memcpy( &component, componentPtr,
1142 sizeof(RF_SingleComponent_t));
1143 retcode = rf_delete_component(raidPtr, &component);
1144 return(retcode);
1145
1146 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1147 componentPtr = (RF_SingleComponent_t *)data;
1148 memcpy( &component, componentPtr,
1149 sizeof(RF_SingleComponent_t));
1150 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1151 return(retcode);
1152
1153 case RAIDFRAME_REBUILD_IN_PLACE:
1154
1155 if (raidPtr->Layout.map->faultsTolerated == 0) {
1156 /* Can't do this on a RAID 0!! */
1157 return(EINVAL);
1158 }
1159
1160 if (raidPtr->recon_in_progress == 1) {
1161 /* a reconstruct is already in progress! */
1162 return(EINVAL);
1163 }
1164
1165 componentPtr = (RF_SingleComponent_t *) data;
1166 memcpy( &component, componentPtr,
1167 sizeof(RF_SingleComponent_t));
1168 component.row = 0; /* we don't support any more */
1169 column = component.column;
1170
1171 if ((column < 0) || (column >= raidPtr->numCol)) {
1172 return(EINVAL);
1173 }
1174
1175 RF_LOCK_MUTEX(raidPtr->mutex);
1176 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1177 (raidPtr->numFailures > 0)) {
1178 /* XXX 0 above shouldn't be constant!!! */
1179 /* some component other than this has failed.
1180 Let's not make things worse than they already
1181 are... */
1182 printf("raid%d: Unable to reconstruct to disk at:\n",
1183 raidPtr->raidid);
1184 printf("raid%d: Col: %d Too many failures.\n",
1185 raidPtr->raidid, column);
1186 RF_UNLOCK_MUTEX(raidPtr->mutex);
1187 return (EINVAL);
1188 }
1189 if (raidPtr->Disks[column].status ==
1190 rf_ds_reconstructing) {
1191 printf("raid%d: Unable to reconstruct to disk at:\n",
1192 raidPtr->raidid);
1193 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1194
1195 RF_UNLOCK_MUTEX(raidPtr->mutex);
1196 return (EINVAL);
1197 }
1198 if (raidPtr->Disks[column].status == rf_ds_spared) {
1199 RF_UNLOCK_MUTEX(raidPtr->mutex);
1200 return (EINVAL);
1201 }
1202 RF_UNLOCK_MUTEX(raidPtr->mutex);
1203
1204 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1205 if (rrcopy == NULL)
1206 return(ENOMEM);
1207
1208 rrcopy->raidPtr = (void *) raidPtr;
1209 rrcopy->col = column;
1210
1211 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1212 rf_ReconstructInPlaceThread,
1213 rrcopy,"raid_reconip");
1214 return(retcode);
1215
1216 case RAIDFRAME_GET_INFO:
1217 if (!raidPtr->valid)
1218 return (ENODEV);
1219 ucfgp = (RF_DeviceConfig_t **) data;
1220 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1221 (RF_DeviceConfig_t *));
1222 if (d_cfg == NULL)
1223 return (ENOMEM);
1224 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1225 d_cfg->rows = 1; /* there is only 1 row now */
1226 d_cfg->cols = raidPtr->numCol;
1227 d_cfg->ndevs = raidPtr->numCol;
1228 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1229 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1230 return (ENOMEM);
1231 }
1232 d_cfg->nspares = raidPtr->numSpare;
1233 if (d_cfg->nspares >= RF_MAX_DISKS) {
1234 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1235 return (ENOMEM);
1236 }
1237 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1238 d = 0;
1239 for (j = 0; j < d_cfg->cols; j++) {
1240 d_cfg->devs[d] = raidPtr->Disks[j];
1241 d++;
1242 }
1243 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1244 d_cfg->spares[i] = raidPtr->Disks[j];
1245 }
1246 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1247 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1248
1249 return (retcode);
1250
1251 case RAIDFRAME_CHECK_PARITY:
1252 *(int *) data = raidPtr->parity_good;
1253 return (0);
1254
1255 case RAIDFRAME_RESET_ACCTOTALS:
1256 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1257 return (0);
1258
1259 case RAIDFRAME_GET_ACCTOTALS:
1260 totals = (RF_AccTotals_t *) data;
1261 *totals = raidPtr->acc_totals;
1262 return (0);
1263
1264 case RAIDFRAME_KEEP_ACCTOTALS:
1265 raidPtr->keep_acc_totals = *(int *)data;
1266 return (0);
1267
1268 case RAIDFRAME_GET_SIZE:
1269 *(int *) data = raidPtr->totalSectors;
1270 return (0);
1271
1272 /* fail a disk & optionally start reconstruction */
1273 case RAIDFRAME_FAIL_DISK:
1274
1275 if (raidPtr->Layout.map->faultsTolerated == 0) {
1276 /* Can't do this on a RAID 0!! */
1277 return(EINVAL);
1278 }
1279
1280 rr = (struct rf_recon_req *) data;
1281 rr->row = 0;
1282 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1283 return (EINVAL);
1284
1285
1286 RF_LOCK_MUTEX(raidPtr->mutex);
1287 if ((raidPtr->Disks[rr->col].status ==
1288 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1289 /* some other component has failed. Let's not make
1290 things worse. XXX wrong for RAID6 */
1291 RF_UNLOCK_MUTEX(raidPtr->mutex);
1292 return (EINVAL);
1293 }
1294 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1295 /* Can't fail a spared disk! */
1296 RF_UNLOCK_MUTEX(raidPtr->mutex);
1297 return (EINVAL);
1298 }
1299 RF_UNLOCK_MUTEX(raidPtr->mutex);
1300
1301 /* make a copy of the recon request so that we don't rely on
1302 * the user's buffer */
1303 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1304 if (rrcopy == NULL)
1305 return(ENOMEM);
1306 memcpy(rrcopy, rr, sizeof(*rr));
1307 rrcopy->raidPtr = (void *) raidPtr;
1308
1309 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1310 rf_ReconThread,
1311 rrcopy,"raid_recon");
1312 return (0);
1313
1314 /* invoke a copyback operation after recon on whatever disk
1315 * needs it, if any */
1316 case RAIDFRAME_COPYBACK:
1317
1318 if (raidPtr->Layout.map->faultsTolerated == 0) {
1319 /* This makes no sense on a RAID 0!! */
1320 return(EINVAL);
1321 }
1322
1323 if (raidPtr->copyback_in_progress == 1) {
1324 /* Copyback is already in progress! */
1325 return(EINVAL);
1326 }
1327
1328 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1329 rf_CopybackThread,
1330 raidPtr,"raid_copyback");
1331 return (retcode);
1332
1333 /* return the percentage completion of reconstruction */
1334 case RAIDFRAME_CHECK_RECON_STATUS:
1335 if (raidPtr->Layout.map->faultsTolerated == 0) {
1336 /* This makes no sense on a RAID 0, so tell the
1337 user it's done. */
1338 *(int *) data = 100;
1339 return(0);
1340 }
1341 if (raidPtr->status != rf_rs_reconstructing)
1342 *(int *) data = 100;
1343 else {
1344 if (raidPtr->reconControl->numRUsTotal > 0) {
1345 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1346 } else {
1347 *(int *) data = 0;
1348 }
1349 }
1350 return (0);
1351 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1352 progressInfoPtr = (RF_ProgressInfo_t **) data;
1353 if (raidPtr->status != rf_rs_reconstructing) {
1354 progressInfo.remaining = 0;
1355 progressInfo.completed = 100;
1356 progressInfo.total = 100;
1357 } else {
1358 progressInfo.total =
1359 raidPtr->reconControl->numRUsTotal;
1360 progressInfo.completed =
1361 raidPtr->reconControl->numRUsComplete;
1362 progressInfo.remaining = progressInfo.total -
1363 progressInfo.completed;
1364 }
1365 retcode = copyout(&progressInfo, *progressInfoPtr,
1366 sizeof(RF_ProgressInfo_t));
1367 return (retcode);
1368
1369 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1370 if (raidPtr->Layout.map->faultsTolerated == 0) {
1371 /* This makes no sense on a RAID 0, so tell the
1372 user it's done. */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->parity_rewrite_in_progress == 1) {
1377 *(int *) data = 100 *
1378 raidPtr->parity_rewrite_stripes_done /
1379 raidPtr->Layout.numStripe;
1380 } else {
1381 *(int *) data = 100;
1382 }
1383 return (0);
1384
1385 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1386 progressInfoPtr = (RF_ProgressInfo_t **) data;
1387 if (raidPtr->parity_rewrite_in_progress == 1) {
1388 progressInfo.total = raidPtr->Layout.numStripe;
1389 progressInfo.completed =
1390 raidPtr->parity_rewrite_stripes_done;
1391 progressInfo.remaining = progressInfo.total -
1392 progressInfo.completed;
1393 } else {
1394 progressInfo.remaining = 0;
1395 progressInfo.completed = 100;
1396 progressInfo.total = 100;
1397 }
1398 retcode = copyout(&progressInfo, *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1403 if (raidPtr->Layout.map->faultsTolerated == 0) {
1404 /* This makes no sense on a RAID 0 */
1405 *(int *) data = 100;
1406 return(0);
1407 }
1408 if (raidPtr->copyback_in_progress == 1) {
1409 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1410 raidPtr->Layout.numStripe;
1411 } else {
1412 *(int *) data = 100;
1413 }
1414 return (0);
1415
1416 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1417 progressInfoPtr = (RF_ProgressInfo_t **) data;
1418 if (raidPtr->copyback_in_progress == 1) {
1419 progressInfo.total = raidPtr->Layout.numStripe;
1420 progressInfo.completed =
1421 raidPtr->copyback_stripes_done;
1422 progressInfo.remaining = progressInfo.total -
1423 progressInfo.completed;
1424 } else {
1425 progressInfo.remaining = 0;
1426 progressInfo.completed = 100;
1427 progressInfo.total = 100;
1428 }
1429 retcode = copyout(&progressInfo, *progressInfoPtr,
1430 sizeof(RF_ProgressInfo_t));
1431 return (retcode);
1432
1433 /* the sparetable daemon calls this to wait for the kernel to
1434 * need a spare table. this ioctl does not return until a
1435 * spare table is needed. XXX -- calling mpsleep here in the
1436 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1437 * -- I should either compute the spare table in the kernel,
1438 * or have a different -- XXX XXX -- interface (a different
1439 * character device) for delivering the table -- XXX */
1440 #if 0
1441 case RAIDFRAME_SPARET_WAIT:
1442 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1443 while (!rf_sparet_wait_queue)
1444 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1445 waitreq = rf_sparet_wait_queue;
1446 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1447 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1448
1449 /* structure assignment */
1450 *((RF_SparetWait_t *) data) = *waitreq;
1451
1452 RF_Free(waitreq, sizeof(*waitreq));
1453 return (0);
1454
1455 /* wakes up a process waiting on SPARET_WAIT and puts an error
1456 * code in it that will cause the dameon to exit */
1457 case RAIDFRAME_ABORT_SPARET_WAIT:
1458 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1459 waitreq->fcol = -1;
1460 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1461 waitreq->next = rf_sparet_wait_queue;
1462 rf_sparet_wait_queue = waitreq;
1463 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1464 wakeup(&rf_sparet_wait_queue);
1465 return (0);
1466
1467 /* used by the spare table daemon to deliver a spare table
1468 * into the kernel */
1469 case RAIDFRAME_SEND_SPARET:
1470
1471 /* install the spare table */
1472 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1473
1474 /* respond to the requestor. the return status of the spare
1475 * table installation is passed in the "fcol" field */
1476 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1477 waitreq->fcol = retcode;
1478 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1479 waitreq->next = rf_sparet_resp_queue;
1480 rf_sparet_resp_queue = waitreq;
1481 wakeup(&rf_sparet_resp_queue);
1482 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1483
1484 return (retcode);
1485 #endif
1486
1487 default:
1488 break; /* fall through to the os-specific code below */
1489
1490 }
1491
1492 if (!raidPtr->valid)
1493 return (EINVAL);
1494
1495 /*
1496 * Add support for "regular" device ioctls here.
1497 */
1498
1499 switch (cmd) {
1500 case DIOCGDINFO:
1501 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1502 break;
1503 #ifdef __HAVE_OLD_DISKLABEL
1504 case ODIOCGDINFO:
1505 newlabel = *(rs->sc_dkdev.dk_label);
1506 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1507 return ENOTTY;
1508 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1509 break;
1510 #endif
1511
1512 case DIOCGPART:
1513 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1514 ((struct partinfo *) data)->part =
1515 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1516 break;
1517
1518 case DIOCWDINFO:
1519 case DIOCSDINFO:
1520 #ifdef __HAVE_OLD_DISKLABEL
1521 case ODIOCWDINFO:
1522 case ODIOCSDINFO:
1523 #endif
1524 {
1525 struct disklabel *lp;
1526 #ifdef __HAVE_OLD_DISKLABEL
1527 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1528 memset(&newlabel, 0, sizeof newlabel);
1529 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1530 lp = &newlabel;
1531 } else
1532 #endif
1533 lp = (struct disklabel *)data;
1534
1535 if ((error = raidlock(rs)) != 0)
1536 return (error);
1537
1538 rs->sc_flags |= RAIDF_LABELLING;
1539
1540 error = setdisklabel(rs->sc_dkdev.dk_label,
1541 lp, 0, rs->sc_dkdev.dk_cpulabel);
1542 if (error == 0) {
1543 if (cmd == DIOCWDINFO
1544 #ifdef __HAVE_OLD_DISKLABEL
1545 || cmd == ODIOCWDINFO
1546 #endif
1547 )
1548 error = writedisklabel(RAIDLABELDEV(dev),
1549 raidstrategy, rs->sc_dkdev.dk_label,
1550 rs->sc_dkdev.dk_cpulabel);
1551 }
1552 rs->sc_flags &= ~RAIDF_LABELLING;
1553
1554 raidunlock(rs);
1555
1556 if (error)
1557 return (error);
1558 break;
1559 }
1560
1561 case DIOCWLABEL:
1562 if (*(int *) data != 0)
1563 rs->sc_flags |= RAIDF_WLABEL;
1564 else
1565 rs->sc_flags &= ~RAIDF_WLABEL;
1566 break;
1567
1568 case DIOCGDEFLABEL:
1569 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1570 break;
1571
1572 #ifdef __HAVE_OLD_DISKLABEL
1573 case ODIOCGDEFLABEL:
1574 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1575 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1576 return ENOTTY;
1577 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1578 break;
1579 #endif
1580
1581 default:
1582 retcode = ENOTTY;
1583 }
1584 return (retcode);
1585
1586 }
1587
1588
1589 /* raidinit -- complete the rest of the initialization for the
1590 RAIDframe device. */
1591
1592
1593 static void
1594 raidinit(RF_Raid_t *raidPtr)
1595 {
1596 struct raid_softc *rs;
1597 int unit;
1598
1599 unit = raidPtr->raidid;
1600
1601 rs = &raid_softc[unit];
1602
1603 /* XXX should check return code first... */
1604 rs->sc_flags |= RAIDF_INITED;
1605
1606 /* XXX doesn't check bounds. */
1607 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1608
1609 rs->sc_dkdev.dk_name = rs->sc_xname;
1610
1611 /* disk_attach actually creates space for the CPU disklabel, among
1612 * other things, so it's critical to call this *BEFORE* we try putzing
1613 * with disklabels. */
1614
1615 disk_attach(&rs->sc_dkdev);
1616
1617 /* XXX There may be a weird interaction here between this, and
1618 * protectedSectors, as used in RAIDframe. */
1619
1620 rs->sc_size = raidPtr->totalSectors;
1621 }
1622 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1623 /* wake up the daemon & tell it to get us a spare table
1624 * XXX
1625 * the entries in the queues should be tagged with the raidPtr
1626 * so that in the extremely rare case that two recons happen at once,
1627 * we know for which device were requesting a spare table
1628 * XXX
1629 *
1630 * XXX This code is not currently used. GO
1631 */
1632 int
1633 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1634 {
1635 int retcode;
1636
1637 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1638 req->next = rf_sparet_wait_queue;
1639 rf_sparet_wait_queue = req;
1640 wakeup(&rf_sparet_wait_queue);
1641
1642 /* mpsleep unlocks the mutex */
1643 while (!rf_sparet_resp_queue) {
1644 tsleep(&rf_sparet_resp_queue, PRIBIO,
1645 "raidframe getsparetable", 0);
1646 }
1647 req = rf_sparet_resp_queue;
1648 rf_sparet_resp_queue = req->next;
1649 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1650
1651 retcode = req->fcol;
1652 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1653 * alloc'd */
1654 return (retcode);
1655 }
1656 #endif
1657
1658 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1659 * bp & passes it down.
1660 * any calls originating in the kernel must use non-blocking I/O
1661 * do some extra sanity checking to return "appropriate" error values for
1662 * certain conditions (to make some standard utilities work)
1663 *
1664 * Formerly known as: rf_DoAccessKernel
1665 */
1666 void
1667 raidstart(RF_Raid_t *raidPtr)
1668 {
1669 RF_SectorCount_t num_blocks, pb, sum;
1670 RF_RaidAddr_t raid_addr;
1671 struct partition *pp;
1672 daddr_t blocknum;
1673 int unit;
1674 struct raid_softc *rs;
1675 int do_async;
1676 struct buf *bp;
1677 int rc;
1678
1679 unit = raidPtr->raidid;
1680 rs = &raid_softc[unit];
1681
1682 /* quick check to see if anything has died recently */
1683 RF_LOCK_MUTEX(raidPtr->mutex);
1684 if (raidPtr->numNewFailures > 0) {
1685 RF_UNLOCK_MUTEX(raidPtr->mutex);
1686 rf_update_component_labels(raidPtr,
1687 RF_NORMAL_COMPONENT_UPDATE);
1688 RF_LOCK_MUTEX(raidPtr->mutex);
1689 raidPtr->numNewFailures--;
1690 }
1691
1692 /* Check to see if we're at the limit... */
1693 while (raidPtr->openings > 0) {
1694 RF_UNLOCK_MUTEX(raidPtr->mutex);
1695
1696 /* get the next item, if any, from the queue */
1697 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1698 /* nothing more to do */
1699 return;
1700 }
1701
1702 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1703 * partition.. Need to make it absolute to the underlying
1704 * device.. */
1705
1706 blocknum = bp->b_blkno;
1707 if (DISKPART(bp->b_dev) != RAW_PART) {
1708 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1709 blocknum += pp->p_offset;
1710 }
1711
1712 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1713 (int) blocknum));
1714
1715 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1716 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1717
1718 /* *THIS* is where we adjust what block we're going to...
1719 * but DO NOT TOUCH bp->b_blkno!!! */
1720 raid_addr = blocknum;
1721
1722 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1723 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1724 sum = raid_addr + num_blocks + pb;
1725 if (1 || rf_debugKernelAccess) {
1726 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1727 (int) raid_addr, (int) sum, (int) num_blocks,
1728 (int) pb, (int) bp->b_resid));
1729 }
1730 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1731 || (sum < num_blocks) || (sum < pb)) {
1732 bp->b_error = ENOSPC;
1733 bp->b_flags |= B_ERROR;
1734 bp->b_resid = bp->b_bcount;
1735 biodone(bp);
1736 RF_LOCK_MUTEX(raidPtr->mutex);
1737 continue;
1738 }
1739 /*
1740 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1741 */
1742
1743 if (bp->b_bcount & raidPtr->sectorMask) {
1744 bp->b_error = EINVAL;
1745 bp->b_flags |= B_ERROR;
1746 bp->b_resid = bp->b_bcount;
1747 biodone(bp);
1748 RF_LOCK_MUTEX(raidPtr->mutex);
1749 continue;
1750
1751 }
1752 db1_printf(("Calling DoAccess..\n"));
1753
1754
1755 RF_LOCK_MUTEX(raidPtr->mutex);
1756 raidPtr->openings--;
1757 RF_UNLOCK_MUTEX(raidPtr->mutex);
1758
1759 /*
1760 * Everything is async.
1761 */
1762 do_async = 1;
1763
1764 disk_busy(&rs->sc_dkdev);
1765
1766 /* XXX we're still at splbio() here... do we *really*
1767 need to be? */
1768
1769 /* don't ever condition on bp->b_flags & B_WRITE.
1770 * always condition on B_READ instead */
1771
1772 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1773 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1774 do_async, raid_addr, num_blocks,
1775 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1776
1777 if (rc) {
1778 bp->b_error = rc;
1779 bp->b_flags |= B_ERROR;
1780 bp->b_resid = bp->b_bcount;
1781 biodone(bp);
1782 /* continue loop */
1783 }
1784
1785 RF_LOCK_MUTEX(raidPtr->mutex);
1786 }
1787 RF_UNLOCK_MUTEX(raidPtr->mutex);
1788 }
1789
1790
1791
1792
1793 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1794
1795 int
1796 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1797 {
1798 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1799 struct buf *bp;
1800 struct raidbuf *raidbp = NULL;
1801
1802 req->queue = queue;
1803
1804 #if DIAGNOSTIC
1805 if (queue->raidPtr->raidid >= numraid) {
1806 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1807 numraid);
1808 panic("Invalid Unit number in rf_DispatchKernelIO");
1809 }
1810 #endif
1811
1812 bp = req->bp;
1813 #if 1
1814 /* XXX when there is a physical disk failure, someone is passing us a
1815 * buffer that contains old stuff!! Attempt to deal with this problem
1816 * without taking a performance hit... (not sure where the real bug
1817 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1818
1819 if (bp->b_flags & B_ERROR) {
1820 bp->b_flags &= ~B_ERROR;
1821 }
1822 if (bp->b_error != 0) {
1823 bp->b_error = 0;
1824 }
1825 #endif
1826 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1827 if (raidbp == NULL) {
1828 bp->b_flags |= B_ERROR;
1829 bp->b_error = ENOMEM;
1830 return (ENOMEM);
1831 }
1832 BUF_INIT(&raidbp->rf_buf);
1833
1834 /*
1835 * context for raidiodone
1836 */
1837 raidbp->rf_obp = bp;
1838 raidbp->req = req;
1839
1840 BIO_COPYPRIO(&raidbp->rf_buf, bp);
1841
1842 switch (req->type) {
1843 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1844 /* XXX need to do something extra here.. */
1845 /* I'm leaving this in, as I've never actually seen it used,
1846 * and I'd like folks to report it... GO */
1847 printf(("WAKEUP CALLED\n"));
1848 queue->numOutstanding++;
1849
1850 /* XXX need to glue the original buffer into this?? */
1851
1852 KernelWakeupFunc(&raidbp->rf_buf);
1853 break;
1854
1855 case RF_IO_TYPE_READ:
1856 case RF_IO_TYPE_WRITE:
1857 #if RF_ACC_TRACE > 0
1858 if (req->tracerec) {
1859 RF_ETIMER_START(req->tracerec->timer);
1860 }
1861 #endif
1862 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1863 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1864 req->sectorOffset, req->numSector,
1865 req->buf, KernelWakeupFunc, (void *) req,
1866 queue->raidPtr->logBytesPerSector, req->b_proc);
1867
1868 if (rf_debugKernelAccess) {
1869 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1870 (long) bp->b_blkno));
1871 }
1872 queue->numOutstanding++;
1873 queue->last_deq_sector = req->sectorOffset;
1874 /* acc wouldn't have been let in if there were any pending
1875 * reqs at any other priority */
1876 queue->curPriority = req->priority;
1877
1878 db1_printf(("Going for %c to unit %d col %d\n",
1879 req->type, queue->raidPtr->raidid,
1880 queue->col));
1881 db1_printf(("sector %d count %d (%d bytes) %d\n",
1882 (int) req->sectorOffset, (int) req->numSector,
1883 (int) (req->numSector <<
1884 queue->raidPtr->logBytesPerSector),
1885 (int) queue->raidPtr->logBytesPerSector));
1886 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1887 raidbp->rf_buf.b_vp->v_numoutput++;
1888 }
1889 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
1890
1891 break;
1892
1893 default:
1894 panic("bad req->type in rf_DispatchKernelIO");
1895 }
1896 db1_printf(("Exiting from DispatchKernelIO\n"));
1897
1898 return (0);
1899 }
1900 /* this is the callback function associated with a I/O invoked from
1901 kernel code.
1902 */
1903 static void
1904 KernelWakeupFunc(struct buf *vbp)
1905 {
1906 RF_DiskQueueData_t *req = NULL;
1907 RF_DiskQueue_t *queue;
1908 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1909 struct buf *bp;
1910 int s;
1911
1912 s = splbio();
1913 db1_printf(("recovering the request queue:\n"));
1914 req = raidbp->req;
1915
1916 bp = raidbp->rf_obp;
1917
1918 queue = (RF_DiskQueue_t *) req->queue;
1919
1920 if (raidbp->rf_buf.b_flags & B_ERROR) {
1921 bp->b_flags |= B_ERROR;
1922 bp->b_error = raidbp->rf_buf.b_error ?
1923 raidbp->rf_buf.b_error : EIO;
1924 }
1925
1926 /* XXX methinks this could be wrong... */
1927 #if 1
1928 bp->b_resid = raidbp->rf_buf.b_resid;
1929 #endif
1930 #if RF_ACC_TRACE > 0
1931 if (req->tracerec) {
1932 RF_ETIMER_STOP(req->tracerec->timer);
1933 RF_ETIMER_EVAL(req->tracerec->timer);
1934 RF_LOCK_MUTEX(rf_tracing_mutex);
1935 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1936 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1937 req->tracerec->num_phys_ios++;
1938 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1939 }
1940 #endif
1941 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1942
1943 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1944 * ballistic, and mark the component as hosed... */
1945
1946 if (bp->b_flags & B_ERROR) {
1947 /* Mark the disk as dead */
1948 /* but only mark it once... */
1949 if (queue->raidPtr->Disks[queue->col].status ==
1950 rf_ds_optimal) {
1951 printf("raid%d: IO Error. Marking %s as failed.\n",
1952 queue->raidPtr->raidid,
1953 queue->raidPtr->Disks[queue->col].devname);
1954 queue->raidPtr->Disks[queue->col].status =
1955 rf_ds_failed;
1956 queue->raidPtr->status = rf_rs_degraded;
1957 queue->raidPtr->numFailures++;
1958 queue->raidPtr->numNewFailures++;
1959 } else { /* Disk is already dead... */
1960 /* printf("Disk already marked as dead!\n"); */
1961 }
1962
1963 }
1964
1965 pool_put(&rf_pools.cbuf, raidbp);
1966
1967 /* Fill in the error value */
1968
1969 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1970
1971 simple_lock(&queue->raidPtr->iodone_lock);
1972
1973 /* Drop this one on the "finished" queue... */
1974 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1975
1976 /* Let the raidio thread know there is work to be done. */
1977 wakeup(&(queue->raidPtr->iodone));
1978
1979 simple_unlock(&queue->raidPtr->iodone_lock);
1980
1981 splx(s);
1982 }
1983
1984
1985
1986 /*
1987 * initialize a buf structure for doing an I/O in the kernel.
1988 */
1989 static void
1990 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1991 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
1992 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1993 struct proc *b_proc)
1994 {
1995 /* bp->b_flags = B_PHYS | rw_flag; */
1996 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1997 bp->b_bcount = numSect << logBytesPerSector;
1998 bp->b_bufsize = bp->b_bcount;
1999 bp->b_error = 0;
2000 bp->b_dev = dev;
2001 bp->b_data = buf;
2002 bp->b_blkno = startSect;
2003 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2004 if (bp->b_bcount == 0) {
2005 panic("bp->b_bcount is zero in InitBP!!");
2006 }
2007 bp->b_proc = b_proc;
2008 bp->b_iodone = cbFunc;
2009 bp->b_vp = b_vp;
2010
2011 }
2012
2013 static void
2014 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2015 struct disklabel *lp)
2016 {
2017 memset(lp, 0, sizeof(*lp));
2018
2019 /* fabricate a label... */
2020 lp->d_secperunit = raidPtr->totalSectors;
2021 lp->d_secsize = raidPtr->bytesPerSector;
2022 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2023 lp->d_ntracks = 4 * raidPtr->numCol;
2024 lp->d_ncylinders = raidPtr->totalSectors /
2025 (lp->d_nsectors * lp->d_ntracks);
2026 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2027
2028 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2029 lp->d_type = DTYPE_RAID;
2030 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2031 lp->d_rpm = 3600;
2032 lp->d_interleave = 1;
2033 lp->d_flags = 0;
2034
2035 lp->d_partitions[RAW_PART].p_offset = 0;
2036 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2037 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2038 lp->d_npartitions = RAW_PART + 1;
2039
2040 lp->d_magic = DISKMAGIC;
2041 lp->d_magic2 = DISKMAGIC;
2042 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2043
2044 }
2045 /*
2046 * Read the disklabel from the raid device. If one is not present, fake one
2047 * up.
2048 */
2049 static void
2050 raidgetdisklabel(dev_t dev)
2051 {
2052 int unit = raidunit(dev);
2053 struct raid_softc *rs = &raid_softc[unit];
2054 const char *errstring;
2055 struct disklabel *lp = rs->sc_dkdev.dk_label;
2056 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2057 RF_Raid_t *raidPtr;
2058
2059 db1_printf(("Getting the disklabel...\n"));
2060
2061 memset(clp, 0, sizeof(*clp));
2062
2063 raidPtr = raidPtrs[unit];
2064
2065 raidgetdefaultlabel(raidPtr, rs, lp);
2066
2067 /*
2068 * Call the generic disklabel extraction routine.
2069 */
2070 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2071 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2072 if (errstring)
2073 raidmakedisklabel(rs);
2074 else {
2075 int i;
2076 struct partition *pp;
2077
2078 /*
2079 * Sanity check whether the found disklabel is valid.
2080 *
2081 * This is necessary since total size of the raid device
2082 * may vary when an interleave is changed even though exactly
2083 * same componets are used, and old disklabel may used
2084 * if that is found.
2085 */
2086 if (lp->d_secperunit != rs->sc_size)
2087 printf("raid%d: WARNING: %s: "
2088 "total sector size in disklabel (%d) != "
2089 "the size of raid (%ld)\n", unit, rs->sc_xname,
2090 lp->d_secperunit, (long) rs->sc_size);
2091 for (i = 0; i < lp->d_npartitions; i++) {
2092 pp = &lp->d_partitions[i];
2093 if (pp->p_offset + pp->p_size > rs->sc_size)
2094 printf("raid%d: WARNING: %s: end of partition `%c' "
2095 "exceeds the size of raid (%ld)\n",
2096 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2097 }
2098 }
2099
2100 }
2101 /*
2102 * Take care of things one might want to take care of in the event
2103 * that a disklabel isn't present.
2104 */
2105 static void
2106 raidmakedisklabel(struct raid_softc *rs)
2107 {
2108 struct disklabel *lp = rs->sc_dkdev.dk_label;
2109 db1_printf(("Making a label..\n"));
2110
2111 /*
2112 * For historical reasons, if there's no disklabel present
2113 * the raw partition must be marked FS_BSDFFS.
2114 */
2115
2116 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2117
2118 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2119
2120 lp->d_checksum = dkcksum(lp);
2121 }
2122 /*
2123 * Lookup the provided name in the filesystem. If the file exists,
2124 * is a valid block device, and isn't being used by anyone else,
2125 * set *vpp to the file's vnode.
2126 * You'll find the original of this in ccd.c
2127 */
2128 int
2129 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2130 {
2131 struct nameidata nd;
2132 struct vnode *vp;
2133 struct proc *p;
2134 struct vattr va;
2135 int error;
2136
2137 p = l ? l->l_proc : NULL;
2138 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2139 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2140 return (error);
2141 }
2142 vp = nd.ni_vp;
2143 if (vp->v_usecount > 1) {
2144 VOP_UNLOCK(vp, 0);
2145 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2146 return (EBUSY);
2147 }
2148 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) {
2149 VOP_UNLOCK(vp, 0);
2150 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2151 return (error);
2152 }
2153 /* XXX: eventually we should handle VREG, too. */
2154 if (va.va_type != VBLK) {
2155 VOP_UNLOCK(vp, 0);
2156 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2157 return (ENOTBLK);
2158 }
2159 VOP_UNLOCK(vp, 0);
2160 *vpp = vp;
2161 return (0);
2162 }
2163 /*
2164 * Wait interruptibly for an exclusive lock.
2165 *
2166 * XXX
2167 * Several drivers do this; it should be abstracted and made MP-safe.
2168 * (Hmm... where have we seen this warning before :-> GO )
2169 */
2170 static int
2171 raidlock(struct raid_softc *rs)
2172 {
2173 int error;
2174
2175 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2176 rs->sc_flags |= RAIDF_WANTED;
2177 if ((error =
2178 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2179 return (error);
2180 }
2181 rs->sc_flags |= RAIDF_LOCKED;
2182 return (0);
2183 }
2184 /*
2185 * Unlock and wake up any waiters.
2186 */
2187 static void
2188 raidunlock(struct raid_softc *rs)
2189 {
2190
2191 rs->sc_flags &= ~RAIDF_LOCKED;
2192 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2193 rs->sc_flags &= ~RAIDF_WANTED;
2194 wakeup(rs);
2195 }
2196 }
2197
2198
2199 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2200 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2201
2202 int
2203 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2204 {
2205 RF_ComponentLabel_t clabel;
2206 raidread_component_label(dev, b_vp, &clabel);
2207 clabel.mod_counter = mod_counter;
2208 clabel.clean = RF_RAID_CLEAN;
2209 raidwrite_component_label(dev, b_vp, &clabel);
2210 return(0);
2211 }
2212
2213
2214 int
2215 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2216 {
2217 RF_ComponentLabel_t clabel;
2218 raidread_component_label(dev, b_vp, &clabel);
2219 clabel.mod_counter = mod_counter;
2220 clabel.clean = RF_RAID_DIRTY;
2221 raidwrite_component_label(dev, b_vp, &clabel);
2222 return(0);
2223 }
2224
2225 /* ARGSUSED */
2226 int
2227 raidread_component_label(dev_t dev, struct vnode *b_vp,
2228 RF_ComponentLabel_t *clabel)
2229 {
2230 struct buf *bp;
2231 const struct bdevsw *bdev;
2232 int error;
2233
2234 /* XXX should probably ensure that we don't try to do this if
2235 someone has changed rf_protected_sectors. */
2236
2237 if (b_vp == NULL) {
2238 /* For whatever reason, this component is not valid.
2239 Don't try to read a component label from it. */
2240 return(EINVAL);
2241 }
2242
2243 /* get a block of the appropriate size... */
2244 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2245 bp->b_dev = dev;
2246
2247 /* get our ducks in a row for the read */
2248 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2249 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2250 bp->b_flags |= B_READ;
2251 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2252
2253 bdev = bdevsw_lookup(bp->b_dev);
2254 if (bdev == NULL)
2255 return (ENXIO);
2256 (*bdev->d_strategy)(bp);
2257
2258 error = biowait(bp);
2259
2260 if (!error) {
2261 memcpy(clabel, bp->b_data,
2262 sizeof(RF_ComponentLabel_t));
2263 }
2264
2265 brelse(bp);
2266 return(error);
2267 }
2268 /* ARGSUSED */
2269 int
2270 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2271 RF_ComponentLabel_t *clabel)
2272 {
2273 struct buf *bp;
2274 const struct bdevsw *bdev;
2275 int error;
2276
2277 /* get a block of the appropriate size... */
2278 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2279 bp->b_dev = dev;
2280
2281 /* get our ducks in a row for the write */
2282 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2283 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2284 bp->b_flags |= B_WRITE;
2285 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2286
2287 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2288
2289 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2290
2291 bdev = bdevsw_lookup(bp->b_dev);
2292 if (bdev == NULL)
2293 return (ENXIO);
2294 (*bdev->d_strategy)(bp);
2295 error = biowait(bp);
2296 brelse(bp);
2297 if (error) {
2298 #if 1
2299 printf("Failed to write RAID component info!\n");
2300 #endif
2301 }
2302
2303 return(error);
2304 }
2305
2306 void
2307 rf_markalldirty(RF_Raid_t *raidPtr)
2308 {
2309 RF_ComponentLabel_t clabel;
2310 int sparecol;
2311 int c;
2312 int j;
2313 int scol = -1;
2314
2315 raidPtr->mod_counter++;
2316 for (c = 0; c < raidPtr->numCol; c++) {
2317 /* we don't want to touch (at all) a disk that has
2318 failed */
2319 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2320 raidread_component_label(
2321 raidPtr->Disks[c].dev,
2322 raidPtr->raid_cinfo[c].ci_vp,
2323 &clabel);
2324 if (clabel.status == rf_ds_spared) {
2325 /* XXX do something special...
2326 but whatever you do, don't
2327 try to access it!! */
2328 } else {
2329 raidmarkdirty(
2330 raidPtr->Disks[c].dev,
2331 raidPtr->raid_cinfo[c].ci_vp,
2332 raidPtr->mod_counter);
2333 }
2334 }
2335 }
2336
2337 for( c = 0; c < raidPtr->numSpare ; c++) {
2338 sparecol = raidPtr->numCol + c;
2339 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2340 /*
2341
2342 we claim this disk is "optimal" if it's
2343 rf_ds_used_spare, as that means it should be
2344 directly substitutable for the disk it replaced.
2345 We note that too...
2346
2347 */
2348
2349 for(j=0;j<raidPtr->numCol;j++) {
2350 if (raidPtr->Disks[j].spareCol == sparecol) {
2351 scol = j;
2352 break;
2353 }
2354 }
2355
2356 raidread_component_label(
2357 raidPtr->Disks[sparecol].dev,
2358 raidPtr->raid_cinfo[sparecol].ci_vp,
2359 &clabel);
2360 /* make sure status is noted */
2361
2362 raid_init_component_label(raidPtr, &clabel);
2363
2364 clabel.row = 0;
2365 clabel.column = scol;
2366 /* Note: we *don't* change status from rf_ds_used_spare
2367 to rf_ds_optimal */
2368 /* clabel.status = rf_ds_optimal; */
2369
2370 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2371 raidPtr->raid_cinfo[sparecol].ci_vp,
2372 raidPtr->mod_counter);
2373 }
2374 }
2375 }
2376
2377
2378 void
2379 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2380 {
2381 RF_ComponentLabel_t clabel;
2382 int sparecol;
2383 int c;
2384 int j;
2385 int scol;
2386
2387 scol = -1;
2388
2389 /* XXX should do extra checks to make sure things really are clean,
2390 rather than blindly setting the clean bit... */
2391
2392 raidPtr->mod_counter++;
2393
2394 for (c = 0; c < raidPtr->numCol; c++) {
2395 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2396 raidread_component_label(
2397 raidPtr->Disks[c].dev,
2398 raidPtr->raid_cinfo[c].ci_vp,
2399 &clabel);
2400 /* make sure status is noted */
2401 clabel.status = rf_ds_optimal;
2402 /* bump the counter */
2403 clabel.mod_counter = raidPtr->mod_counter;
2404
2405 raidwrite_component_label(
2406 raidPtr->Disks[c].dev,
2407 raidPtr->raid_cinfo[c].ci_vp,
2408 &clabel);
2409 if (final == RF_FINAL_COMPONENT_UPDATE) {
2410 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2411 raidmarkclean(
2412 raidPtr->Disks[c].dev,
2413 raidPtr->raid_cinfo[c].ci_vp,
2414 raidPtr->mod_counter);
2415 }
2416 }
2417 }
2418 /* else we don't touch it.. */
2419 }
2420
2421 for( c = 0; c < raidPtr->numSpare ; c++) {
2422 sparecol = raidPtr->numCol + c;
2423 /* Need to ensure that the reconstruct actually completed! */
2424 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2425 /*
2426
2427 we claim this disk is "optimal" if it's
2428 rf_ds_used_spare, as that means it should be
2429 directly substitutable for the disk it replaced.
2430 We note that too...
2431
2432 */
2433
2434 for(j=0;j<raidPtr->numCol;j++) {
2435 if (raidPtr->Disks[j].spareCol == sparecol) {
2436 scol = j;
2437 break;
2438 }
2439 }
2440
2441 /* XXX shouldn't *really* need this... */
2442 raidread_component_label(
2443 raidPtr->Disks[sparecol].dev,
2444 raidPtr->raid_cinfo[sparecol].ci_vp,
2445 &clabel);
2446 /* make sure status is noted */
2447
2448 raid_init_component_label(raidPtr, &clabel);
2449
2450 clabel.mod_counter = raidPtr->mod_counter;
2451 clabel.column = scol;
2452 clabel.status = rf_ds_optimal;
2453
2454 raidwrite_component_label(
2455 raidPtr->Disks[sparecol].dev,
2456 raidPtr->raid_cinfo[sparecol].ci_vp,
2457 &clabel);
2458 if (final == RF_FINAL_COMPONENT_UPDATE) {
2459 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2460 raidmarkclean( raidPtr->Disks[sparecol].dev,
2461 raidPtr->raid_cinfo[sparecol].ci_vp,
2462 raidPtr->mod_counter);
2463 }
2464 }
2465 }
2466 }
2467 }
2468
2469 void
2470 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2471 {
2472 struct proc *p;
2473 struct lwp *l;
2474
2475 p = raidPtr->engine_thread;
2476 l = LIST_FIRST(&p->p_lwps);
2477
2478 if (vp != NULL) {
2479 if (auto_configured == 1) {
2480 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2481 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2482 vput(vp);
2483
2484 } else {
2485 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2486 }
2487 }
2488 }
2489
2490
2491 void
2492 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2493 {
2494 int r,c;
2495 struct vnode *vp;
2496 int acd;
2497
2498
2499 /* We take this opportunity to close the vnodes like we should.. */
2500
2501 for (c = 0; c < raidPtr->numCol; c++) {
2502 vp = raidPtr->raid_cinfo[c].ci_vp;
2503 acd = raidPtr->Disks[c].auto_configured;
2504 rf_close_component(raidPtr, vp, acd);
2505 raidPtr->raid_cinfo[c].ci_vp = NULL;
2506 raidPtr->Disks[c].auto_configured = 0;
2507 }
2508
2509 for (r = 0; r < raidPtr->numSpare; r++) {
2510 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2511 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2512 rf_close_component(raidPtr, vp, acd);
2513 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2514 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2515 }
2516 }
2517
2518
2519 void
2520 rf_ReconThread(struct rf_recon_req *req)
2521 {
2522 int s;
2523 RF_Raid_t *raidPtr;
2524
2525 s = splbio();
2526 raidPtr = (RF_Raid_t *) req->raidPtr;
2527 raidPtr->recon_in_progress = 1;
2528
2529 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2530 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2531
2532 RF_Free(req, sizeof(*req));
2533
2534 raidPtr->recon_in_progress = 0;
2535 splx(s);
2536
2537 /* That's all... */
2538 kthread_exit(0); /* does not return */
2539 }
2540
2541 void
2542 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2543 {
2544 int retcode;
2545 int s;
2546
2547 raidPtr->parity_rewrite_in_progress = 1;
2548 s = splbio();
2549 retcode = rf_RewriteParity(raidPtr);
2550 splx(s);
2551 if (retcode) {
2552 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2553 } else {
2554 /* set the clean bit! If we shutdown correctly,
2555 the clean bit on each component label will get
2556 set */
2557 raidPtr->parity_good = RF_RAID_CLEAN;
2558 }
2559 raidPtr->parity_rewrite_in_progress = 0;
2560
2561 /* Anyone waiting for us to stop? If so, inform them... */
2562 if (raidPtr->waitShutdown) {
2563 wakeup(&raidPtr->parity_rewrite_in_progress);
2564 }
2565
2566 /* That's all... */
2567 kthread_exit(0); /* does not return */
2568 }
2569
2570
2571 void
2572 rf_CopybackThread(RF_Raid_t *raidPtr)
2573 {
2574 int s;
2575
2576 raidPtr->copyback_in_progress = 1;
2577 s = splbio();
2578 rf_CopybackReconstructedData(raidPtr);
2579 splx(s);
2580 raidPtr->copyback_in_progress = 0;
2581
2582 /* That's all... */
2583 kthread_exit(0); /* does not return */
2584 }
2585
2586
2587 void
2588 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2589 {
2590 int s;
2591 RF_Raid_t *raidPtr;
2592
2593 s = splbio();
2594 raidPtr = req->raidPtr;
2595 raidPtr->recon_in_progress = 1;
2596 rf_ReconstructInPlace(raidPtr, req->col);
2597 RF_Free(req, sizeof(*req));
2598 raidPtr->recon_in_progress = 0;
2599 splx(s);
2600
2601 /* That's all... */
2602 kthread_exit(0); /* does not return */
2603 }
2604
2605 RF_AutoConfig_t *
2606 rf_find_raid_components()
2607 {
2608 struct vnode *vp;
2609 struct disklabel label;
2610 struct device *dv;
2611 dev_t dev;
2612 int bmajor;
2613 int error;
2614 int i;
2615 int good_one;
2616 RF_ComponentLabel_t *clabel;
2617 RF_AutoConfig_t *ac_list;
2618 RF_AutoConfig_t *ac;
2619
2620
2621 /* initialize the AutoConfig list */
2622 ac_list = NULL;
2623
2624 /* we begin by trolling through *all* the devices on the system */
2625
2626 for (dv = alldevs.tqh_first; dv != NULL;
2627 dv = dv->dv_list.tqe_next) {
2628
2629 /* we are only interested in disks... */
2630 if (dv->dv_class != DV_DISK)
2631 continue;
2632
2633 /* we don't care about floppies... */
2634 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2635 continue;
2636 }
2637
2638 /* we don't care about CD's... */
2639 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2640 continue;
2641 }
2642
2643 /* hdfd is the Atari/Hades floppy driver */
2644 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2645 continue;
2646 }
2647 /* fdisa is the Atari/Milan floppy driver */
2648 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2649 continue;
2650 }
2651
2652 /* need to find the device_name_to_block_device_major stuff */
2653 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2654
2655 /* get a vnode for the raw partition of this disk */
2656
2657 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2658 if (bdevvp(dev, &vp))
2659 panic("RAID can't alloc vnode");
2660
2661 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2662
2663 if (error) {
2664 /* "Who cares." Continue looking
2665 for something that exists*/
2666 vput(vp);
2667 continue;
2668 }
2669
2670 /* Ok, the disk exists. Go get the disklabel. */
2671 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2672 if (error) {
2673 /*
2674 * XXX can't happen - open() would
2675 * have errored out (or faked up one)
2676 */
2677 if (error != ENOTTY)
2678 printf("RAIDframe: can't get label for dev "
2679 "%s (%d)\n", dv->dv_xname, error);
2680 }
2681
2682 /* don't need this any more. We'll allocate it again
2683 a little later if we really do... */
2684 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2685 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2686 vput(vp);
2687
2688 if (error)
2689 continue;
2690
2691 for (i=0; i < label.d_npartitions; i++) {
2692 /* We only support partitions marked as RAID */
2693 if (label.d_partitions[i].p_fstype != FS_RAID)
2694 continue;
2695
2696 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2697 if (bdevvp(dev, &vp))
2698 panic("RAID can't alloc vnode");
2699
2700 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2701 if (error) {
2702 /* Whatever... */
2703 vput(vp);
2704 continue;
2705 }
2706
2707 good_one = 0;
2708
2709 clabel = (RF_ComponentLabel_t *)
2710 malloc(sizeof(RF_ComponentLabel_t),
2711 M_RAIDFRAME, M_NOWAIT);
2712 if (clabel == NULL) {
2713 /* XXX CLEANUP HERE */
2714 printf("RAID auto config: out of memory!\n");
2715 return(NULL); /* XXX probably should panic? */
2716 }
2717
2718 if (!raidread_component_label(dev, vp, clabel)) {
2719 /* Got the label. Does it look reasonable? */
2720 if (rf_reasonable_label(clabel) &&
2721 (clabel->partitionSize <=
2722 label.d_partitions[i].p_size)) {
2723 #if DEBUG
2724 printf("Component on: %s%c: %d\n",
2725 dv->dv_xname, 'a'+i,
2726 label.d_partitions[i].p_size);
2727 rf_print_component_label(clabel);
2728 #endif
2729 /* if it's reasonable, add it,
2730 else ignore it. */
2731 ac = (RF_AutoConfig_t *)
2732 malloc(sizeof(RF_AutoConfig_t),
2733 M_RAIDFRAME,
2734 M_NOWAIT);
2735 if (ac == NULL) {
2736 /* XXX should panic?? */
2737 return(NULL);
2738 }
2739
2740 snprintf(ac->devname,
2741 sizeof(ac->devname), "%s%c",
2742 dv->dv_xname, 'a'+i);
2743 ac->dev = dev;
2744 ac->vp = vp;
2745 ac->clabel = clabel;
2746 ac->next = ac_list;
2747 ac_list = ac;
2748 good_one = 1;
2749 }
2750 }
2751 if (!good_one) {
2752 /* cleanup */
2753 free(clabel, M_RAIDFRAME);
2754 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2755 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2756 vput(vp);
2757 }
2758 }
2759 }
2760 return(ac_list);
2761 }
2762
2763 static int
2764 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2765 {
2766
2767 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2768 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2769 ((clabel->clean == RF_RAID_CLEAN) ||
2770 (clabel->clean == RF_RAID_DIRTY)) &&
2771 clabel->row >=0 &&
2772 clabel->column >= 0 &&
2773 clabel->num_rows > 0 &&
2774 clabel->num_columns > 0 &&
2775 clabel->row < clabel->num_rows &&
2776 clabel->column < clabel->num_columns &&
2777 clabel->blockSize > 0 &&
2778 clabel->numBlocks > 0) {
2779 /* label looks reasonable enough... */
2780 return(1);
2781 }
2782 return(0);
2783 }
2784
2785
2786 #if DEBUG
2787 void
2788 rf_print_component_label(RF_ComponentLabel_t *clabel)
2789 {
2790 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2791 clabel->row, clabel->column,
2792 clabel->num_rows, clabel->num_columns);
2793 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2794 clabel->version, clabel->serial_number,
2795 clabel->mod_counter);
2796 printf(" Clean: %s Status: %d\n",
2797 clabel->clean ? "Yes" : "No", clabel->status );
2798 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2799 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2800 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2801 (char) clabel->parityConfig, clabel->blockSize,
2802 clabel->numBlocks);
2803 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2804 printf(" Contains root partition: %s\n",
2805 clabel->root_partition ? "Yes" : "No" );
2806 printf(" Last configured as: raid%d\n", clabel->last_unit );
2807 #if 0
2808 printf(" Config order: %d\n", clabel->config_order);
2809 #endif
2810
2811 }
2812 #endif
2813
2814 RF_ConfigSet_t *
2815 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2816 {
2817 RF_AutoConfig_t *ac;
2818 RF_ConfigSet_t *config_sets;
2819 RF_ConfigSet_t *cset;
2820 RF_AutoConfig_t *ac_next;
2821
2822
2823 config_sets = NULL;
2824
2825 /* Go through the AutoConfig list, and figure out which components
2826 belong to what sets. */
2827 ac = ac_list;
2828 while(ac!=NULL) {
2829 /* we're going to putz with ac->next, so save it here
2830 for use at the end of the loop */
2831 ac_next = ac->next;
2832
2833 if (config_sets == NULL) {
2834 /* will need at least this one... */
2835 config_sets = (RF_ConfigSet_t *)
2836 malloc(sizeof(RF_ConfigSet_t),
2837 M_RAIDFRAME, M_NOWAIT);
2838 if (config_sets == NULL) {
2839 panic("rf_create_auto_sets: No memory!");
2840 }
2841 /* this one is easy :) */
2842 config_sets->ac = ac;
2843 config_sets->next = NULL;
2844 config_sets->rootable = 0;
2845 ac->next = NULL;
2846 } else {
2847 /* which set does this component fit into? */
2848 cset = config_sets;
2849 while(cset!=NULL) {
2850 if (rf_does_it_fit(cset, ac)) {
2851 /* looks like it matches... */
2852 ac->next = cset->ac;
2853 cset->ac = ac;
2854 break;
2855 }
2856 cset = cset->next;
2857 }
2858 if (cset==NULL) {
2859 /* didn't find a match above... new set..*/
2860 cset = (RF_ConfigSet_t *)
2861 malloc(sizeof(RF_ConfigSet_t),
2862 M_RAIDFRAME, M_NOWAIT);
2863 if (cset == NULL) {
2864 panic("rf_create_auto_sets: No memory!");
2865 }
2866 cset->ac = ac;
2867 ac->next = NULL;
2868 cset->next = config_sets;
2869 cset->rootable = 0;
2870 config_sets = cset;
2871 }
2872 }
2873 ac = ac_next;
2874 }
2875
2876
2877 return(config_sets);
2878 }
2879
2880 static int
2881 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2882 {
2883 RF_ComponentLabel_t *clabel1, *clabel2;
2884
2885 /* If this one matches the *first* one in the set, that's good
2886 enough, since the other members of the set would have been
2887 through here too... */
2888 /* note that we are not checking partitionSize here..
2889
2890 Note that we are also not checking the mod_counters here.
2891 If everything else matches execpt the mod_counter, that's
2892 good enough for this test. We will deal with the mod_counters
2893 a little later in the autoconfiguration process.
2894
2895 (clabel1->mod_counter == clabel2->mod_counter) &&
2896
2897 The reason we don't check for this is that failed disks
2898 will have lower modification counts. If those disks are
2899 not added to the set they used to belong to, then they will
2900 form their own set, which may result in 2 different sets,
2901 for example, competing to be configured at raid0, and
2902 perhaps competing to be the root filesystem set. If the
2903 wrong ones get configured, or both attempt to become /,
2904 weird behaviour and or serious lossage will occur. Thus we
2905 need to bring them into the fold here, and kick them out at
2906 a later point.
2907
2908 */
2909
2910 clabel1 = cset->ac->clabel;
2911 clabel2 = ac->clabel;
2912 if ((clabel1->version == clabel2->version) &&
2913 (clabel1->serial_number == clabel2->serial_number) &&
2914 (clabel1->num_rows == clabel2->num_rows) &&
2915 (clabel1->num_columns == clabel2->num_columns) &&
2916 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2917 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2918 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2919 (clabel1->parityConfig == clabel2->parityConfig) &&
2920 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2921 (clabel1->blockSize == clabel2->blockSize) &&
2922 (clabel1->numBlocks == clabel2->numBlocks) &&
2923 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2924 (clabel1->root_partition == clabel2->root_partition) &&
2925 (clabel1->last_unit == clabel2->last_unit) &&
2926 (clabel1->config_order == clabel2->config_order)) {
2927 /* if it get's here, it almost *has* to be a match */
2928 } else {
2929 /* it's not consistent with somebody in the set..
2930 punt */
2931 return(0);
2932 }
2933 /* all was fine.. it must fit... */
2934 return(1);
2935 }
2936
2937 int
2938 rf_have_enough_components(RF_ConfigSet_t *cset)
2939 {
2940 RF_AutoConfig_t *ac;
2941 RF_AutoConfig_t *auto_config;
2942 RF_ComponentLabel_t *clabel;
2943 int c;
2944 int num_cols;
2945 int num_missing;
2946 int mod_counter;
2947 int mod_counter_found;
2948 int even_pair_failed;
2949 char parity_type;
2950
2951
2952 /* check to see that we have enough 'live' components
2953 of this set. If so, we can configure it if necessary */
2954
2955 num_cols = cset->ac->clabel->num_columns;
2956 parity_type = cset->ac->clabel->parityConfig;
2957
2958 /* XXX Check for duplicate components!?!?!? */
2959
2960 /* Determine what the mod_counter is supposed to be for this set. */
2961
2962 mod_counter_found = 0;
2963 mod_counter = 0;
2964 ac = cset->ac;
2965 while(ac!=NULL) {
2966 if (mod_counter_found==0) {
2967 mod_counter = ac->clabel->mod_counter;
2968 mod_counter_found = 1;
2969 } else {
2970 if (ac->clabel->mod_counter > mod_counter) {
2971 mod_counter = ac->clabel->mod_counter;
2972 }
2973 }
2974 ac = ac->next;
2975 }
2976
2977 num_missing = 0;
2978 auto_config = cset->ac;
2979
2980 even_pair_failed = 0;
2981 for(c=0; c<num_cols; c++) {
2982 ac = auto_config;
2983 while(ac!=NULL) {
2984 if ((ac->clabel->column == c) &&
2985 (ac->clabel->mod_counter == mod_counter)) {
2986 /* it's this one... */
2987 #if DEBUG
2988 printf("Found: %s at %d\n",
2989 ac->devname,c);
2990 #endif
2991 break;
2992 }
2993 ac=ac->next;
2994 }
2995 if (ac==NULL) {
2996 /* Didn't find one here! */
2997 /* special case for RAID 1, especially
2998 where there are more than 2
2999 components (where RAIDframe treats
3000 things a little differently :( ) */
3001 if (parity_type == '1') {
3002 if (c%2 == 0) { /* even component */
3003 even_pair_failed = 1;
3004 } else { /* odd component. If
3005 we're failed, and
3006 so is the even
3007 component, it's
3008 "Good Night, Charlie" */
3009 if (even_pair_failed == 1) {
3010 return(0);
3011 }
3012 }
3013 } else {
3014 /* normal accounting */
3015 num_missing++;
3016 }
3017 }
3018 if ((parity_type == '1') && (c%2 == 1)) {
3019 /* Just did an even component, and we didn't
3020 bail.. reset the even_pair_failed flag,
3021 and go on to the next component.... */
3022 even_pair_failed = 0;
3023 }
3024 }
3025
3026 clabel = cset->ac->clabel;
3027
3028 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3029 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3030 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3031 /* XXX this needs to be made *much* more general */
3032 /* Too many failures */
3033 return(0);
3034 }
3035 /* otherwise, all is well, and we've got enough to take a kick
3036 at autoconfiguring this set */
3037 return(1);
3038 }
3039
3040 void
3041 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3042 RF_Raid_t *raidPtr)
3043 {
3044 RF_ComponentLabel_t *clabel;
3045 int i;
3046
3047 clabel = ac->clabel;
3048
3049 /* 1. Fill in the common stuff */
3050 config->numRow = clabel->num_rows = 1;
3051 config->numCol = clabel->num_columns;
3052 config->numSpare = 0; /* XXX should this be set here? */
3053 config->sectPerSU = clabel->sectPerSU;
3054 config->SUsPerPU = clabel->SUsPerPU;
3055 config->SUsPerRU = clabel->SUsPerRU;
3056 config->parityConfig = clabel->parityConfig;
3057 /* XXX... */
3058 strcpy(config->diskQueueType,"fifo");
3059 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3060 config->layoutSpecificSize = 0; /* XXX ?? */
3061
3062 while(ac!=NULL) {
3063 /* row/col values will be in range due to the checks
3064 in reasonable_label() */
3065 strcpy(config->devnames[0][ac->clabel->column],
3066 ac->devname);
3067 ac = ac->next;
3068 }
3069
3070 for(i=0;i<RF_MAXDBGV;i++) {
3071 config->debugVars[i][0] = 0;
3072 }
3073 }
3074
3075 int
3076 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3077 {
3078 RF_ComponentLabel_t clabel;
3079 struct vnode *vp;
3080 dev_t dev;
3081 int column;
3082 int sparecol;
3083
3084 raidPtr->autoconfigure = new_value;
3085
3086 for(column=0; column<raidPtr->numCol; column++) {
3087 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3088 dev = raidPtr->Disks[column].dev;
3089 vp = raidPtr->raid_cinfo[column].ci_vp;
3090 raidread_component_label(dev, vp, &clabel);
3091 clabel.autoconfigure = new_value;
3092 raidwrite_component_label(dev, vp, &clabel);
3093 }
3094 }
3095 for(column = 0; column < raidPtr->numSpare ; column++) {
3096 sparecol = raidPtr->numCol + column;
3097 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3098 dev = raidPtr->Disks[sparecol].dev;
3099 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3100 raidread_component_label(dev, vp, &clabel);
3101 clabel.autoconfigure = new_value;
3102 raidwrite_component_label(dev, vp, &clabel);
3103 }
3104 }
3105 return(new_value);
3106 }
3107
3108 int
3109 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3110 {
3111 RF_ComponentLabel_t clabel;
3112 struct vnode *vp;
3113 dev_t dev;
3114 int column;
3115 int sparecol;
3116
3117 raidPtr->root_partition = new_value;
3118 for(column=0; column<raidPtr->numCol; column++) {
3119 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3120 dev = raidPtr->Disks[column].dev;
3121 vp = raidPtr->raid_cinfo[column].ci_vp;
3122 raidread_component_label(dev, vp, &clabel);
3123 clabel.root_partition = new_value;
3124 raidwrite_component_label(dev, vp, &clabel);
3125 }
3126 }
3127 for(column = 0; column < raidPtr->numSpare ; column++) {
3128 sparecol = raidPtr->numCol + column;
3129 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3130 dev = raidPtr->Disks[sparecol].dev;
3131 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3132 raidread_component_label(dev, vp, &clabel);
3133 clabel.root_partition = new_value;
3134 raidwrite_component_label(dev, vp, &clabel);
3135 }
3136 }
3137 return(new_value);
3138 }
3139
3140 void
3141 rf_release_all_vps(RF_ConfigSet_t *cset)
3142 {
3143 RF_AutoConfig_t *ac;
3144
3145 ac = cset->ac;
3146 while(ac!=NULL) {
3147 /* Close the vp, and give it back */
3148 if (ac->vp) {
3149 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3150 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3151 vput(ac->vp);
3152 ac->vp = NULL;
3153 }
3154 ac = ac->next;
3155 }
3156 }
3157
3158
3159 void
3160 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3161 {
3162 RF_AutoConfig_t *ac;
3163 RF_AutoConfig_t *next_ac;
3164
3165 ac = cset->ac;
3166 while(ac!=NULL) {
3167 next_ac = ac->next;
3168 /* nuke the label */
3169 free(ac->clabel, M_RAIDFRAME);
3170 /* cleanup the config structure */
3171 free(ac, M_RAIDFRAME);
3172 /* "next.." */
3173 ac = next_ac;
3174 }
3175 /* and, finally, nuke the config set */
3176 free(cset, M_RAIDFRAME);
3177 }
3178
3179
3180 void
3181 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3182 {
3183 /* current version number */
3184 clabel->version = RF_COMPONENT_LABEL_VERSION;
3185 clabel->serial_number = raidPtr->serial_number;
3186 clabel->mod_counter = raidPtr->mod_counter;
3187 clabel->num_rows = 1;
3188 clabel->num_columns = raidPtr->numCol;
3189 clabel->clean = RF_RAID_DIRTY; /* not clean */
3190 clabel->status = rf_ds_optimal; /* "It's good!" */
3191
3192 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3193 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3194 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3195
3196 clabel->blockSize = raidPtr->bytesPerSector;
3197 clabel->numBlocks = raidPtr->sectorsPerDisk;
3198
3199 /* XXX not portable */
3200 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3201 clabel->maxOutstanding = raidPtr->maxOutstanding;
3202 clabel->autoconfigure = raidPtr->autoconfigure;
3203 clabel->root_partition = raidPtr->root_partition;
3204 clabel->last_unit = raidPtr->raidid;
3205 clabel->config_order = raidPtr->config_order;
3206 }
3207
3208 int
3209 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3210 {
3211 RF_Raid_t *raidPtr;
3212 RF_Config_t *config;
3213 int raidID;
3214 int retcode;
3215
3216 #if DEBUG
3217 printf("RAID autoconfigure\n");
3218 #endif
3219
3220 retcode = 0;
3221 *unit = -1;
3222
3223 /* 1. Create a config structure */
3224
3225 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3226 M_RAIDFRAME,
3227 M_NOWAIT);
3228 if (config==NULL) {
3229 printf("Out of mem!?!?\n");
3230 /* XXX do something more intelligent here. */
3231 return(1);
3232 }
3233
3234 memset(config, 0, sizeof(RF_Config_t));
3235
3236 /*
3237 2. Figure out what RAID ID this one is supposed to live at
3238 See if we can get the same RAID dev that it was configured
3239 on last time..
3240 */
3241
3242 raidID = cset->ac->clabel->last_unit;
3243 if ((raidID < 0) || (raidID >= numraid)) {
3244 /* let's not wander off into lala land. */
3245 raidID = numraid - 1;
3246 }
3247 if (raidPtrs[raidID]->valid != 0) {
3248
3249 /*
3250 Nope... Go looking for an alternative...
3251 Start high so we don't immediately use raid0 if that's
3252 not taken.
3253 */
3254
3255 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3256 if (raidPtrs[raidID]->valid == 0) {
3257 /* can use this one! */
3258 break;
3259 }
3260 }
3261 }
3262
3263 if (raidID < 0) {
3264 /* punt... */
3265 printf("Unable to auto configure this set!\n");
3266 printf("(Out of RAID devs!)\n");
3267 return(1);
3268 }
3269
3270 #if DEBUG
3271 printf("Configuring raid%d:\n",raidID);
3272 #endif
3273
3274 raidPtr = raidPtrs[raidID];
3275
3276 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3277 raidPtr->raidid = raidID;
3278 raidPtr->openings = RAIDOUTSTANDING;
3279
3280 /* 3. Build the configuration structure */
3281 rf_create_configuration(cset->ac, config, raidPtr);
3282
3283 /* 4. Do the configuration */
3284 retcode = rf_Configure(raidPtr, config, cset->ac);
3285
3286 if (retcode == 0) {
3287
3288 raidinit(raidPtrs[raidID]);
3289
3290 rf_markalldirty(raidPtrs[raidID]);
3291 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3292 if (cset->ac->clabel->root_partition==1) {
3293 /* everything configured just fine. Make a note
3294 that this set is eligible to be root. */
3295 cset->rootable = 1;
3296 /* XXX do this here? */
3297 raidPtrs[raidID]->root_partition = 1;
3298 }
3299 }
3300
3301 /* 5. Cleanup */
3302 free(config, M_RAIDFRAME);
3303
3304 *unit = raidID;
3305 return(retcode);
3306 }
3307
3308 void
3309 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3310 {
3311 struct buf *bp;
3312
3313 bp = (struct buf *)desc->bp;
3314 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3315 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3316 }
3317
3318 void
3319 rf_pool_init(struct pool *p, size_t size, char *w_chan,
3320 size_t min, size_t max)
3321 {
3322 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3323 pool_sethiwat(p, max);
3324 pool_prime(p, min);
3325 pool_setlowat(p, min);
3326 }
3327