rf_netbsdkintf.c revision 1.180 1 /* $NetBSD: rf_netbsdkintf.c,v 1.180 2004/07/01 17:48:45 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.180 2004/07/01 17:48:45 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * b_proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* XXX Not sure if the following should be replacing the raidPtrs above,
248 or if it should be used in conjunction with that...
249 */
250
251 struct raid_softc {
252 int sc_flags; /* flags */
253 int sc_cflags; /* configuration flags */
254 size_t sc_size; /* size of the raid device */
255 char sc_xname[20]; /* XXX external name */
256 struct disk sc_dkdev; /* generic disk device info */
257 struct bufq_state buf_queue; /* used for the device queue */
258 };
259 /* sc_flags */
260 #define RAIDF_INITED 0x01 /* unit has been initialized */
261 #define RAIDF_WLABEL 0x02 /* label area is writable */
262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
264 #define RAIDF_LOCKED 0x80 /* unit is locked */
265
266 #define raidunit(x) DISKUNIT(x)
267 int numraid = 0;
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294 struct raid_softc *raid_softc;
295
296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
297 struct disklabel *);
298 static void raidgetdisklabel(dev_t);
299 static void raidmakedisklabel(struct raid_softc *);
300
301 static int raidlock(struct raid_softc *);
302 static void raidunlock(struct raid_softc *);
303
304 static void rf_markalldirty(RF_Raid_t *);
305
306 struct device *raidrootdev;
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(struct device *self);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 static int rf_reasonable_label(RF_ComponentLabel_t *);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 int rf_auto_config_set(RF_ConfigSet_t *, int *);
326
327 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
328 allow autoconfig to take place.
329 Note that this is overridden by having
330 RAID_AUTOCONFIG as an option in the
331 kernel config file. */
332
333 struct RF_Pools_s rf_pools;
334
335 void
336 raidattach(int num)
337 {
338 int raidID;
339 int i, rc;
340
341 #ifdef DEBUG
342 printf("raidattach: Asked for %d units\n", num);
343 #endif
344
345 if (num <= 0) {
346 #ifdef DIAGNOSTIC
347 panic("raidattach: count <= 0");
348 #endif
349 return;
350 }
351 /* This is where all the initialization stuff gets done. */
352
353 numraid = num;
354
355 /* Make some space for requested number of units... */
356
357 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
358 if (raidPtrs == NULL) {
359 panic("raidPtrs is NULL!!");
360 }
361
362 /* Initialize the component buffer pool. */
363 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
364 "raidpl", num * RAIDOUTSTANDING,
365 2 * num * RAIDOUTSTANDING);
366
367 rf_mutex_init(&rf_sparet_wait_mutex);
368
369 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
370
371 for (i = 0; i < num; i++)
372 raidPtrs[i] = NULL;
373 rc = rf_BootRaidframe();
374 if (rc == 0)
375 printf("Kernelized RAIDframe activated\n");
376 else
377 panic("Serious error booting RAID!!");
378
379 /* put together some datastructures like the CCD device does.. This
380 * lets us lock the device and what-not when it gets opened. */
381
382 raid_softc = (struct raid_softc *)
383 malloc(num * sizeof(struct raid_softc),
384 M_RAIDFRAME, M_NOWAIT);
385 if (raid_softc == NULL) {
386 printf("WARNING: no memory for RAIDframe driver\n");
387 return;
388 }
389
390 memset(raid_softc, 0, num * sizeof(struct raid_softc));
391
392 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
393 M_RAIDFRAME, M_NOWAIT);
394 if (raidrootdev == NULL) {
395 panic("No memory for RAIDframe driver!!?!?!");
396 }
397
398 for (raidID = 0; raidID < num; raidID++) {
399 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
400
401 raidrootdev[raidID].dv_class = DV_DISK;
402 raidrootdev[raidID].dv_cfdata = NULL;
403 raidrootdev[raidID].dv_unit = raidID;
404 raidrootdev[raidID].dv_parent = NULL;
405 raidrootdev[raidID].dv_flags = 0;
406 snprintf(raidrootdev[raidID].dv_xname,
407 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
408
409 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
410 (RF_Raid_t *));
411 if (raidPtrs[raidID] == NULL) {
412 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
413 numraid = raidID;
414 return;
415 }
416 }
417
418 #ifdef RAID_AUTOCONFIG
419 raidautoconfig = 1;
420 #endif
421
422 /*
423 * Register a finalizer which will be used to auto-config RAID
424 * sets once all real hardware devices have been found.
425 */
426 if (config_finalize_register(NULL, rf_autoconfig) != 0)
427 printf("WARNING: unable to register RAIDframe finalizer\n");
428 }
429
430 int
431 rf_autoconfig(struct device *self)
432 {
433 RF_AutoConfig_t *ac_list;
434 RF_ConfigSet_t *config_sets;
435
436 if (raidautoconfig == 0)
437 return (0);
438
439 /* XXX This code can only be run once. */
440 raidautoconfig = 0;
441
442 /* 1. locate all RAID components on the system */
443 #ifdef DEBUG
444 printf("Searching for RAID components...\n");
445 #endif
446 ac_list = rf_find_raid_components();
447
448 /* 2. Sort them into their respective sets. */
449 config_sets = rf_create_auto_sets(ac_list);
450
451 /*
452 * 3. Evaluate each set andconfigure the valid ones.
453 * This gets done in rf_buildroothack().
454 */
455 rf_buildroothack(config_sets);
456
457 return (1);
458 }
459
460 void
461 rf_buildroothack(RF_ConfigSet_t *config_sets)
462 {
463 RF_ConfigSet_t *cset;
464 RF_ConfigSet_t *next_cset;
465 int retcode;
466 int raidID;
467 int rootID;
468 int num_root;
469
470 rootID = 0;
471 num_root = 0;
472 cset = config_sets;
473 while(cset != NULL ) {
474 next_cset = cset->next;
475 if (rf_have_enough_components(cset) &&
476 cset->ac->clabel->autoconfigure==1) {
477 retcode = rf_auto_config_set(cset,&raidID);
478 if (!retcode) {
479 if (cset->rootable) {
480 rootID = raidID;
481 num_root++;
482 }
483 } else {
484 /* The autoconfig didn't work :( */
485 #if DEBUG
486 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
487 #endif
488 rf_release_all_vps(cset);
489 }
490 } else {
491 /* we're not autoconfiguring this set...
492 release the associated resources */
493 rf_release_all_vps(cset);
494 }
495 /* cleanup */
496 rf_cleanup_config_set(cset);
497 cset = next_cset;
498 }
499
500 /* we found something bootable... */
501
502 if (num_root == 1) {
503 booted_device = &raidrootdev[rootID];
504 } else if (num_root > 1) {
505 /* we can't guess.. require the user to answer... */
506 boothowto |= RB_ASKNAME;
507 }
508 }
509
510
511 int
512 raidsize(dev_t dev)
513 {
514 struct raid_softc *rs;
515 struct disklabel *lp;
516 int part, unit, omask, size;
517
518 unit = raidunit(dev);
519 if (unit >= numraid)
520 return (-1);
521 rs = &raid_softc[unit];
522
523 if ((rs->sc_flags & RAIDF_INITED) == 0)
524 return (-1);
525
526 part = DISKPART(dev);
527 omask = rs->sc_dkdev.dk_openmask & (1 << part);
528 lp = rs->sc_dkdev.dk_label;
529
530 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
531 return (-1);
532
533 if (lp->d_partitions[part].p_fstype != FS_SWAP)
534 size = -1;
535 else
536 size = lp->d_partitions[part].p_size *
537 (lp->d_secsize / DEV_BSIZE);
538
539 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
540 return (-1);
541
542 return (size);
543
544 }
545
546 int
547 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
548 {
549 /* Not implemented. */
550 return ENXIO;
551 }
552 /* ARGSUSED */
553 int
554 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
555 {
556 int unit = raidunit(dev);
557 struct raid_softc *rs;
558 struct disklabel *lp;
559 int part, pmask;
560 int error = 0;
561
562 if (unit >= numraid)
563 return (ENXIO);
564 rs = &raid_softc[unit];
565
566 if ((error = raidlock(rs)) != 0)
567 return (error);
568 lp = rs->sc_dkdev.dk_label;
569
570 part = DISKPART(dev);
571 pmask = (1 << part);
572
573 if ((rs->sc_flags & RAIDF_INITED) &&
574 (rs->sc_dkdev.dk_openmask == 0))
575 raidgetdisklabel(dev);
576
577 /* make sure that this partition exists */
578
579 if (part != RAW_PART) {
580 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
581 ((part >= lp->d_npartitions) ||
582 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
583 error = ENXIO;
584 raidunlock(rs);
585 return (error);
586 }
587 }
588 /* Prevent this unit from being unconfigured while open. */
589 switch (fmt) {
590 case S_IFCHR:
591 rs->sc_dkdev.dk_copenmask |= pmask;
592 break;
593
594 case S_IFBLK:
595 rs->sc_dkdev.dk_bopenmask |= pmask;
596 break;
597 }
598
599 if ((rs->sc_dkdev.dk_openmask == 0) &&
600 ((rs->sc_flags & RAIDF_INITED) != 0)) {
601 /* First one... mark things as dirty... Note that we *MUST*
602 have done a configure before this. I DO NOT WANT TO BE
603 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
604 THAT THEY BELONG TOGETHER!!!!! */
605 /* XXX should check to see if we're only open for reading
606 here... If so, we needn't do this, but then need some
607 other way of keeping track of what's happened.. */
608
609 rf_markalldirty( raidPtrs[unit] );
610 }
611
612
613 rs->sc_dkdev.dk_openmask =
614 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
615
616 raidunlock(rs);
617
618 return (error);
619
620
621 }
622 /* ARGSUSED */
623 int
624 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
625 {
626 int unit = raidunit(dev);
627 struct raid_softc *rs;
628 int error = 0;
629 int part;
630
631 if (unit >= numraid)
632 return (ENXIO);
633 rs = &raid_softc[unit];
634
635 if ((error = raidlock(rs)) != 0)
636 return (error);
637
638 part = DISKPART(dev);
639
640 /* ...that much closer to allowing unconfiguration... */
641 switch (fmt) {
642 case S_IFCHR:
643 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
644 break;
645
646 case S_IFBLK:
647 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
648 break;
649 }
650 rs->sc_dkdev.dk_openmask =
651 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
652
653 if ((rs->sc_dkdev.dk_openmask == 0) &&
654 ((rs->sc_flags & RAIDF_INITED) != 0)) {
655 /* Last one... device is not unconfigured yet.
656 Device shutdown has taken care of setting the
657 clean bits if RAIDF_INITED is not set
658 mark things as clean... */
659
660 rf_update_component_labels(raidPtrs[unit],
661 RF_FINAL_COMPONENT_UPDATE);
662 if (doing_shutdown) {
663 /* last one, and we're going down, so
664 lights out for this RAID set too. */
665 error = rf_Shutdown(raidPtrs[unit]);
666
667 /* It's no longer initialized... */
668 rs->sc_flags &= ~RAIDF_INITED;
669
670 /* Detach the disk. */
671 disk_detach(&rs->sc_dkdev);
672 }
673 }
674
675 raidunlock(rs);
676 return (0);
677
678 }
679
680 void
681 raidstrategy(struct buf *bp)
682 {
683 int s;
684
685 unsigned int raidID = raidunit(bp->b_dev);
686 RF_Raid_t *raidPtr;
687 struct raid_softc *rs = &raid_softc[raidID];
688 int wlabel;
689
690 if ((rs->sc_flags & RAIDF_INITED) ==0) {
691 bp->b_error = ENXIO;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (raidID >= numraid || !raidPtrs[raidID]) {
698 bp->b_error = ENODEV;
699 bp->b_flags |= B_ERROR;
700 bp->b_resid = bp->b_bcount;
701 biodone(bp);
702 return;
703 }
704 raidPtr = raidPtrs[raidID];
705 if (!raidPtr->valid) {
706 bp->b_error = ENODEV;
707 bp->b_flags |= B_ERROR;
708 bp->b_resid = bp->b_bcount;
709 biodone(bp);
710 return;
711 }
712 if (bp->b_bcount == 0) {
713 db1_printf(("b_bcount is zero..\n"));
714 biodone(bp);
715 return;
716 }
717
718 /*
719 * Do bounds checking and adjust transfer. If there's an
720 * error, the bounds check will flag that for us.
721 */
722
723 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
724 if (DISKPART(bp->b_dev) != RAW_PART)
725 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
726 db1_printf(("Bounds check failed!!:%d %d\n",
727 (int) bp->b_blkno, (int) wlabel));
728 biodone(bp);
729 return;
730 }
731 s = splbio();
732
733 bp->b_resid = 0;
734
735 /* stuff it onto our queue */
736 BUFQ_PUT(&rs->buf_queue, bp);
737
738 raidstart(raidPtrs[raidID]);
739
740 splx(s);
741 }
742 /* ARGSUSED */
743 int
744 raidread(dev_t dev, struct uio *uio, int flags)
745 {
746 int unit = raidunit(dev);
747 struct raid_softc *rs;
748
749 if (unit >= numraid)
750 return (ENXIO);
751 rs = &raid_softc[unit];
752
753 if ((rs->sc_flags & RAIDF_INITED) == 0)
754 return (ENXIO);
755
756 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
757
758 }
759 /* ARGSUSED */
760 int
761 raidwrite(dev_t dev, struct uio *uio, int flags)
762 {
763 int unit = raidunit(dev);
764 struct raid_softc *rs;
765
766 if (unit >= numraid)
767 return (ENXIO);
768 rs = &raid_softc[unit];
769
770 if ((rs->sc_flags & RAIDF_INITED) == 0)
771 return (ENXIO);
772
773 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
774
775 }
776
777 int
778 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
779 {
780 int unit = raidunit(dev);
781 int error = 0;
782 int part, pmask;
783 struct raid_softc *rs;
784 RF_Config_t *k_cfg, *u_cfg;
785 RF_Raid_t *raidPtr;
786 RF_RaidDisk_t *diskPtr;
787 RF_AccTotals_t *totals;
788 RF_DeviceConfig_t *d_cfg, **ucfgp;
789 u_char *specific_buf;
790 int retcode = 0;
791 int column;
792 int raidid;
793 struct rf_recon_req *rrcopy, *rr;
794 RF_ComponentLabel_t *clabel;
795 RF_ComponentLabel_t ci_label;
796 RF_ComponentLabel_t **clabel_ptr;
797 RF_SingleComponent_t *sparePtr,*componentPtr;
798 RF_SingleComponent_t hot_spare;
799 RF_SingleComponent_t component;
800 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
801 int i, j, d;
802 #ifdef __HAVE_OLD_DISKLABEL
803 struct disklabel newlabel;
804 #endif
805
806 if (unit >= numraid)
807 return (ENXIO);
808 rs = &raid_softc[unit];
809 raidPtr = raidPtrs[unit];
810
811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
812 (int) DISKPART(dev), (int) unit, (int) cmd));
813
814 /* Must be open for writes for these commands... */
815 switch (cmd) {
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 #ifdef __HAVE_OLD_DISKLABEL
819 case ODIOCWDINFO:
820 case ODIOCSDINFO:
821 #endif
822 case DIOCWLABEL:
823 if ((flag & FWRITE) == 0)
824 return (EBADF);
825 }
826
827 /* Must be initialized for these... */
828 switch (cmd) {
829 case DIOCGDINFO:
830 case DIOCSDINFO:
831 case DIOCWDINFO:
832 #ifdef __HAVE_OLD_DISKLABEL
833 case ODIOCGDINFO:
834 case ODIOCWDINFO:
835 case ODIOCSDINFO:
836 case ODIOCGDEFLABEL:
837 #endif
838 case DIOCGPART:
839 case DIOCWLABEL:
840 case DIOCGDEFLABEL:
841 case RAIDFRAME_SHUTDOWN:
842 case RAIDFRAME_REWRITEPARITY:
843 case RAIDFRAME_GET_INFO:
844 case RAIDFRAME_RESET_ACCTOTALS:
845 case RAIDFRAME_GET_ACCTOTALS:
846 case RAIDFRAME_KEEP_ACCTOTALS:
847 case RAIDFRAME_GET_SIZE:
848 case RAIDFRAME_FAIL_DISK:
849 case RAIDFRAME_COPYBACK:
850 case RAIDFRAME_CHECK_RECON_STATUS:
851 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 case RAIDFRAME_SET_COMPONENT_LABEL:
854 case RAIDFRAME_ADD_HOT_SPARE:
855 case RAIDFRAME_REMOVE_HOT_SPARE:
856 case RAIDFRAME_INIT_LABELS:
857 case RAIDFRAME_REBUILD_IN_PLACE:
858 case RAIDFRAME_CHECK_PARITY:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
863 case RAIDFRAME_SET_AUTOCONFIG:
864 case RAIDFRAME_SET_ROOT:
865 case RAIDFRAME_DELETE_COMPONENT:
866 case RAIDFRAME_INCORPORATE_HOT_SPARE:
867 if ((rs->sc_flags & RAIDF_INITED) == 0)
868 return (ENXIO);
869 }
870
871 switch (cmd) {
872
873 /* configure the system */
874 case RAIDFRAME_CONFIGURE:
875
876 if (raidPtr->valid) {
877 /* There is a valid RAID set running on this unit! */
878 printf("raid%d: Device already configured!\n",unit);
879 return(EINVAL);
880 }
881
882 /* copy-in the configuration information */
883 /* data points to a pointer to the configuration structure */
884
885 u_cfg = *((RF_Config_t **) data);
886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
887 if (k_cfg == NULL) {
888 return (ENOMEM);
889 }
890 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
891 if (retcode) {
892 RF_Free(k_cfg, sizeof(RF_Config_t));
893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
894 retcode));
895 return (retcode);
896 }
897 /* allocate a buffer for the layout-specific data, and copy it
898 * in */
899 if (k_cfg->layoutSpecificSize) {
900 if (k_cfg->layoutSpecificSize > 10000) {
901 /* sanity check */
902 RF_Free(k_cfg, sizeof(RF_Config_t));
903 return (EINVAL);
904 }
905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
906 (u_char *));
907 if (specific_buf == NULL) {
908 RF_Free(k_cfg, sizeof(RF_Config_t));
909 return (ENOMEM);
910 }
911 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
912 k_cfg->layoutSpecificSize);
913 if (retcode) {
914 RF_Free(k_cfg, sizeof(RF_Config_t));
915 RF_Free(specific_buf,
916 k_cfg->layoutSpecificSize);
917 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
918 retcode));
919 return (retcode);
920 }
921 } else
922 specific_buf = NULL;
923 k_cfg->layoutSpecific = specific_buf;
924
925 /* should do some kind of sanity check on the configuration.
926 * Store the sum of all the bytes in the last byte? */
927
928 /* configure the system */
929
930 /*
931 * Clear the entire RAID descriptor, just to make sure
932 * there is no stale data left in the case of a
933 * reconfiguration
934 */
935 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
936 raidPtr->raidid = unit;
937
938 retcode = rf_Configure(raidPtr, k_cfg, NULL);
939
940 if (retcode == 0) {
941
942 /* allow this many simultaneous IO's to
943 this RAID device */
944 raidPtr->openings = RAIDOUTSTANDING;
945
946 raidinit(raidPtr);
947 rf_markalldirty(raidPtr);
948 }
949 /* free the buffers. No return code here. */
950 if (k_cfg->layoutSpecificSize) {
951 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
952 }
953 RF_Free(k_cfg, sizeof(RF_Config_t));
954
955 return (retcode);
956
957 /* shutdown the system */
958 case RAIDFRAME_SHUTDOWN:
959
960 if ((error = raidlock(rs)) != 0)
961 return (error);
962
963 /*
964 * If somebody has a partition mounted, we shouldn't
965 * shutdown.
966 */
967
968 part = DISKPART(dev);
969 pmask = (1 << part);
970 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
971 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
972 (rs->sc_dkdev.dk_copenmask & pmask))) {
973 raidunlock(rs);
974 return (EBUSY);
975 }
976
977 retcode = rf_Shutdown(raidPtr);
978
979 /* It's no longer initialized... */
980 rs->sc_flags &= ~RAIDF_INITED;
981
982 /* Detach the disk. */
983 disk_detach(&rs->sc_dkdev);
984
985 raidunlock(rs);
986
987 return (retcode);
988 case RAIDFRAME_GET_COMPONENT_LABEL:
989 clabel_ptr = (RF_ComponentLabel_t **) data;
990 /* need to read the component label for the disk indicated
991 by row,column in clabel */
992
993 /* For practice, let's get it directly fromdisk, rather
994 than from the in-core copy */
995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
996 (RF_ComponentLabel_t *));
997 if (clabel == NULL)
998 return (ENOMEM);
999
1000 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 clabel->row = 0; /* Don't allow looking at anything else.*/
1011
1012 column = clabel->column;
1013
1014 if ((column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 raidread_component_label(raidPtr->Disks[column].dev,
1021 raidPtr->raid_cinfo[column].ci_vp,
1022 clabel );
1023
1024 retcode = copyout(clabel, *clabel_ptr,
1025 sizeof(RF_ComponentLabel_t));
1026 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1027 return (retcode);
1028
1029 case RAIDFRAME_SET_COMPONENT_LABEL:
1030 clabel = (RF_ComponentLabel_t *) data;
1031
1032 /* XXX check the label for valid stuff... */
1033 /* Note that some things *should not* get modified --
1034 the user should be re-initing the labels instead of
1035 trying to patch things.
1036 */
1037
1038 raidid = raidPtr->raidid;
1039 #if DEBUG
1040 printf("raid%d: Got component label:\n", raidid);
1041 printf("raid%d: Version: %d\n", raidid, clabel->version);
1042 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1043 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1044 printf("raid%d: Column: %d\n", raidid, clabel->column);
1045 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1046 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1047 printf("raid%d: Status: %d\n", raidid, clabel->status);
1048 #endif
1049 clabel->row = 0;
1050 column = clabel->column;
1051
1052 if ((column < 0) || (column >= raidPtr->numCol)) {
1053 return(EINVAL);
1054 }
1055
1056 /* XXX this isn't allowed to do anything for now :-) */
1057
1058 /* XXX and before it is, we need to fill in the rest
1059 of the fields!?!?!?! */
1060 #if 0
1061 raidwrite_component_label(
1062 raidPtr->Disks[column].dev,
1063 raidPtr->raid_cinfo[column].ci_vp,
1064 clabel );
1065 #endif
1066 return (0);
1067
1068 case RAIDFRAME_INIT_LABELS:
1069 clabel = (RF_ComponentLabel_t *) data;
1070 /*
1071 we only want the serial number from
1072 the above. We get all the rest of the information
1073 from the config that was used to create this RAID
1074 set.
1075 */
1076
1077 raidPtr->serial_number = clabel->serial_number;
1078
1079 raid_init_component_label(raidPtr, &ci_label);
1080 ci_label.serial_number = clabel->serial_number;
1081 ci_label.row = 0; /* we dont' pretend to support more */
1082
1083 for(column=0;column<raidPtr->numCol;column++) {
1084 diskPtr = &raidPtr->Disks[column];
1085 if (!RF_DEAD_DISK(diskPtr->status)) {
1086 ci_label.partitionSize = diskPtr->partitionSize;
1087 ci_label.column = column;
1088 raidwrite_component_label(
1089 raidPtr->Disks[column].dev,
1090 raidPtr->raid_cinfo[column].ci_vp,
1091 &ci_label );
1092 }
1093 }
1094
1095 return (retcode);
1096 case RAIDFRAME_SET_AUTOCONFIG:
1097 d = rf_set_autoconfig(raidPtr, *(int *) data);
1098 printf("raid%d: New autoconfig value is: %d\n",
1099 raidPtr->raidid, d);
1100 *(int *) data = d;
1101 return (retcode);
1102
1103 case RAIDFRAME_SET_ROOT:
1104 d = rf_set_rootpartition(raidPtr, *(int *) data);
1105 printf("raid%d: New rootpartition value is: %d\n",
1106 raidPtr->raidid, d);
1107 *(int *) data = d;
1108 return (retcode);
1109
1110 /* initialize all parity */
1111 case RAIDFRAME_REWRITEPARITY:
1112
1113 if (raidPtr->Layout.map->faultsTolerated == 0) {
1114 /* Parity for RAID 0 is trivially correct */
1115 raidPtr->parity_good = RF_RAID_CLEAN;
1116 return(0);
1117 }
1118
1119 if (raidPtr->parity_rewrite_in_progress == 1) {
1120 /* Re-write is already in progress! */
1121 return(EINVAL);
1122 }
1123
1124 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1125 rf_RewriteParityThread,
1126 raidPtr,"raid_parity");
1127 return (retcode);
1128
1129
1130 case RAIDFRAME_ADD_HOT_SPARE:
1131 sparePtr = (RF_SingleComponent_t *) data;
1132 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1133 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1134 return(retcode);
1135
1136 case RAIDFRAME_REMOVE_HOT_SPARE:
1137 return(retcode);
1138
1139 case RAIDFRAME_DELETE_COMPONENT:
1140 componentPtr = (RF_SingleComponent_t *)data;
1141 memcpy( &component, componentPtr,
1142 sizeof(RF_SingleComponent_t));
1143 retcode = rf_delete_component(raidPtr, &component);
1144 return(retcode);
1145
1146 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1147 componentPtr = (RF_SingleComponent_t *)data;
1148 memcpy( &component, componentPtr,
1149 sizeof(RF_SingleComponent_t));
1150 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1151 return(retcode);
1152
1153 case RAIDFRAME_REBUILD_IN_PLACE:
1154
1155 if (raidPtr->Layout.map->faultsTolerated == 0) {
1156 /* Can't do this on a RAID 0!! */
1157 return(EINVAL);
1158 }
1159
1160 if (raidPtr->recon_in_progress == 1) {
1161 /* a reconstruct is already in progress! */
1162 return(EINVAL);
1163 }
1164
1165 componentPtr = (RF_SingleComponent_t *) data;
1166 memcpy( &component, componentPtr,
1167 sizeof(RF_SingleComponent_t));
1168 component.row = 0; /* we don't support any more */
1169 column = component.column;
1170
1171 if ((column < 0) || (column >= raidPtr->numCol)) {
1172 return(EINVAL);
1173 }
1174
1175 RF_LOCK_MUTEX(raidPtr->mutex);
1176 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1177 (raidPtr->numFailures > 0)) {
1178 /* XXX 0 above shouldn't be constant!!! */
1179 /* some component other than this has failed.
1180 Let's not make things worse than they already
1181 are... */
1182 printf("raid%d: Unable to reconstruct to disk at:\n",
1183 raidPtr->raidid);
1184 printf("raid%d: Col: %d Too many failures.\n",
1185 raidPtr->raidid, column);
1186 RF_UNLOCK_MUTEX(raidPtr->mutex);
1187 return (EINVAL);
1188 }
1189 if (raidPtr->Disks[column].status ==
1190 rf_ds_reconstructing) {
1191 printf("raid%d: Unable to reconstruct to disk at:\n",
1192 raidPtr->raidid);
1193 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1194
1195 RF_UNLOCK_MUTEX(raidPtr->mutex);
1196 return (EINVAL);
1197 }
1198 if (raidPtr->Disks[column].status == rf_ds_spared) {
1199 RF_UNLOCK_MUTEX(raidPtr->mutex);
1200 return (EINVAL);
1201 }
1202 RF_UNLOCK_MUTEX(raidPtr->mutex);
1203
1204 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1205 if (rrcopy == NULL)
1206 return(ENOMEM);
1207
1208 rrcopy->raidPtr = (void *) raidPtr;
1209 rrcopy->col = column;
1210
1211 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1212 rf_ReconstructInPlaceThread,
1213 rrcopy,"raid_reconip");
1214 return(retcode);
1215
1216 case RAIDFRAME_GET_INFO:
1217 if (!raidPtr->valid)
1218 return (ENODEV);
1219 ucfgp = (RF_DeviceConfig_t **) data;
1220 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1221 (RF_DeviceConfig_t *));
1222 if (d_cfg == NULL)
1223 return (ENOMEM);
1224 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1225 d_cfg->rows = 1; /* there is only 1 row now */
1226 d_cfg->cols = raidPtr->numCol;
1227 d_cfg->ndevs = raidPtr->numCol;
1228 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1229 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1230 return (ENOMEM);
1231 }
1232 d_cfg->nspares = raidPtr->numSpare;
1233 if (d_cfg->nspares >= RF_MAX_DISKS) {
1234 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1235 return (ENOMEM);
1236 }
1237 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1238 d = 0;
1239 for (j = 0; j < d_cfg->cols; j++) {
1240 d_cfg->devs[d] = raidPtr->Disks[j];
1241 d++;
1242 }
1243 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1244 d_cfg->spares[i] = raidPtr->Disks[j];
1245 }
1246 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1247 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1248
1249 return (retcode);
1250
1251 case RAIDFRAME_CHECK_PARITY:
1252 *(int *) data = raidPtr->parity_good;
1253 return (0);
1254
1255 case RAIDFRAME_RESET_ACCTOTALS:
1256 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1257 return (0);
1258
1259 case RAIDFRAME_GET_ACCTOTALS:
1260 totals = (RF_AccTotals_t *) data;
1261 *totals = raidPtr->acc_totals;
1262 return (0);
1263
1264 case RAIDFRAME_KEEP_ACCTOTALS:
1265 raidPtr->keep_acc_totals = *(int *)data;
1266 return (0);
1267
1268 case RAIDFRAME_GET_SIZE:
1269 *(int *) data = raidPtr->totalSectors;
1270 return (0);
1271
1272 /* fail a disk & optionally start reconstruction */
1273 case RAIDFRAME_FAIL_DISK:
1274
1275 if (raidPtr->Layout.map->faultsTolerated == 0) {
1276 /* Can't do this on a RAID 0!! */
1277 return(EINVAL);
1278 }
1279
1280 rr = (struct rf_recon_req *) data;
1281 rr->row = 0;
1282 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1283 return (EINVAL);
1284
1285
1286 RF_LOCK_MUTEX(raidPtr->mutex);
1287 if ((raidPtr->Disks[rr->col].status ==
1288 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1289 /* some other component has failed. Let's not make
1290 things worse. XXX wrong for RAID6 */
1291 RF_UNLOCK_MUTEX(raidPtr->mutex);
1292 return (EINVAL);
1293 }
1294 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1295 /* Can't fail a spared disk! */
1296 RF_UNLOCK_MUTEX(raidPtr->mutex);
1297 return (EINVAL);
1298 }
1299 RF_UNLOCK_MUTEX(raidPtr->mutex);
1300
1301 /* make a copy of the recon request so that we don't rely on
1302 * the user's buffer */
1303 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1304 if (rrcopy == NULL)
1305 return(ENOMEM);
1306 memcpy(rrcopy, rr, sizeof(*rr));
1307 rrcopy->raidPtr = (void *) raidPtr;
1308
1309 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1310 rf_ReconThread,
1311 rrcopy,"raid_recon");
1312 return (0);
1313
1314 /* invoke a copyback operation after recon on whatever disk
1315 * needs it, if any */
1316 case RAIDFRAME_COPYBACK:
1317
1318 if (raidPtr->Layout.map->faultsTolerated == 0) {
1319 /* This makes no sense on a RAID 0!! */
1320 return(EINVAL);
1321 }
1322
1323 if (raidPtr->copyback_in_progress == 1) {
1324 /* Copyback is already in progress! */
1325 return(EINVAL);
1326 }
1327
1328 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1329 rf_CopybackThread,
1330 raidPtr,"raid_copyback");
1331 return (retcode);
1332
1333 /* return the percentage completion of reconstruction */
1334 case RAIDFRAME_CHECK_RECON_STATUS:
1335 if (raidPtr->Layout.map->faultsTolerated == 0) {
1336 /* This makes no sense on a RAID 0, so tell the
1337 user it's done. */
1338 *(int *) data = 100;
1339 return(0);
1340 }
1341 if (raidPtr->status != rf_rs_reconstructing)
1342 *(int *) data = 100;
1343 else {
1344 if (raidPtr->reconControl->numRUsTotal > 0) {
1345 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1346 } else {
1347 *(int *) data = 0;
1348 }
1349 }
1350 return (0);
1351 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1352 progressInfoPtr = (RF_ProgressInfo_t **) data;
1353 if (raidPtr->status != rf_rs_reconstructing) {
1354 progressInfo.remaining = 0;
1355 progressInfo.completed = 100;
1356 progressInfo.total = 100;
1357 } else {
1358 progressInfo.total =
1359 raidPtr->reconControl->numRUsTotal;
1360 progressInfo.completed =
1361 raidPtr->reconControl->numRUsComplete;
1362 progressInfo.remaining = progressInfo.total -
1363 progressInfo.completed;
1364 }
1365 retcode = copyout(&progressInfo, *progressInfoPtr,
1366 sizeof(RF_ProgressInfo_t));
1367 return (retcode);
1368
1369 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1370 if (raidPtr->Layout.map->faultsTolerated == 0) {
1371 /* This makes no sense on a RAID 0, so tell the
1372 user it's done. */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->parity_rewrite_in_progress == 1) {
1377 *(int *) data = 100 *
1378 raidPtr->parity_rewrite_stripes_done /
1379 raidPtr->Layout.numStripe;
1380 } else {
1381 *(int *) data = 100;
1382 }
1383 return (0);
1384
1385 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1386 progressInfoPtr = (RF_ProgressInfo_t **) data;
1387 if (raidPtr->parity_rewrite_in_progress == 1) {
1388 progressInfo.total = raidPtr->Layout.numStripe;
1389 progressInfo.completed =
1390 raidPtr->parity_rewrite_stripes_done;
1391 progressInfo.remaining = progressInfo.total -
1392 progressInfo.completed;
1393 } else {
1394 progressInfo.remaining = 0;
1395 progressInfo.completed = 100;
1396 progressInfo.total = 100;
1397 }
1398 retcode = copyout(&progressInfo, *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1403 if (raidPtr->Layout.map->faultsTolerated == 0) {
1404 /* This makes no sense on a RAID 0 */
1405 *(int *) data = 100;
1406 return(0);
1407 }
1408 if (raidPtr->copyback_in_progress == 1) {
1409 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1410 raidPtr->Layout.numStripe;
1411 } else {
1412 *(int *) data = 100;
1413 }
1414 return (0);
1415
1416 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1417 progressInfoPtr = (RF_ProgressInfo_t **) data;
1418 if (raidPtr->copyback_in_progress == 1) {
1419 progressInfo.total = raidPtr->Layout.numStripe;
1420 progressInfo.completed =
1421 raidPtr->copyback_stripes_done;
1422 progressInfo.remaining = progressInfo.total -
1423 progressInfo.completed;
1424 } else {
1425 progressInfo.remaining = 0;
1426 progressInfo.completed = 100;
1427 progressInfo.total = 100;
1428 }
1429 retcode = copyout(&progressInfo, *progressInfoPtr,
1430 sizeof(RF_ProgressInfo_t));
1431 return (retcode);
1432
1433 /* the sparetable daemon calls this to wait for the kernel to
1434 * need a spare table. this ioctl does not return until a
1435 * spare table is needed. XXX -- calling mpsleep here in the
1436 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1437 * -- I should either compute the spare table in the kernel,
1438 * or have a different -- XXX XXX -- interface (a different
1439 * character device) for delivering the table -- XXX */
1440 #if 0
1441 case RAIDFRAME_SPARET_WAIT:
1442 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1443 while (!rf_sparet_wait_queue)
1444 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1445 waitreq = rf_sparet_wait_queue;
1446 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1447 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1448
1449 /* structure assignment */
1450 *((RF_SparetWait_t *) data) = *waitreq;
1451
1452 RF_Free(waitreq, sizeof(*waitreq));
1453 return (0);
1454
1455 /* wakes up a process waiting on SPARET_WAIT and puts an error
1456 * code in it that will cause the dameon to exit */
1457 case RAIDFRAME_ABORT_SPARET_WAIT:
1458 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1459 waitreq->fcol = -1;
1460 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1461 waitreq->next = rf_sparet_wait_queue;
1462 rf_sparet_wait_queue = waitreq;
1463 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1464 wakeup(&rf_sparet_wait_queue);
1465 return (0);
1466
1467 /* used by the spare table daemon to deliver a spare table
1468 * into the kernel */
1469 case RAIDFRAME_SEND_SPARET:
1470
1471 /* install the spare table */
1472 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1473
1474 /* respond to the requestor. the return status of the spare
1475 * table installation is passed in the "fcol" field */
1476 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1477 waitreq->fcol = retcode;
1478 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1479 waitreq->next = rf_sparet_resp_queue;
1480 rf_sparet_resp_queue = waitreq;
1481 wakeup(&rf_sparet_resp_queue);
1482 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1483
1484 return (retcode);
1485 #endif
1486
1487 default:
1488 break; /* fall through to the os-specific code below */
1489
1490 }
1491
1492 if (!raidPtr->valid)
1493 return (EINVAL);
1494
1495 /*
1496 * Add support for "regular" device ioctls here.
1497 */
1498
1499 switch (cmd) {
1500 case DIOCGDINFO:
1501 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1502 break;
1503 #ifdef __HAVE_OLD_DISKLABEL
1504 case ODIOCGDINFO:
1505 newlabel = *(rs->sc_dkdev.dk_label);
1506 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1507 return ENOTTY;
1508 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1509 break;
1510 #endif
1511
1512 case DIOCGPART:
1513 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1514 ((struct partinfo *) data)->part =
1515 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1516 break;
1517
1518 case DIOCWDINFO:
1519 case DIOCSDINFO:
1520 #ifdef __HAVE_OLD_DISKLABEL
1521 case ODIOCWDINFO:
1522 case ODIOCSDINFO:
1523 #endif
1524 {
1525 struct disklabel *lp;
1526 #ifdef __HAVE_OLD_DISKLABEL
1527 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1528 memset(&newlabel, 0, sizeof newlabel);
1529 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1530 lp = &newlabel;
1531 } else
1532 #endif
1533 lp = (struct disklabel *)data;
1534
1535 if ((error = raidlock(rs)) != 0)
1536 return (error);
1537
1538 rs->sc_flags |= RAIDF_LABELLING;
1539
1540 error = setdisklabel(rs->sc_dkdev.dk_label,
1541 lp, 0, rs->sc_dkdev.dk_cpulabel);
1542 if (error == 0) {
1543 if (cmd == DIOCWDINFO
1544 #ifdef __HAVE_OLD_DISKLABEL
1545 || cmd == ODIOCWDINFO
1546 #endif
1547 )
1548 error = writedisklabel(RAIDLABELDEV(dev),
1549 raidstrategy, rs->sc_dkdev.dk_label,
1550 rs->sc_dkdev.dk_cpulabel);
1551 }
1552 rs->sc_flags &= ~RAIDF_LABELLING;
1553
1554 raidunlock(rs);
1555
1556 if (error)
1557 return (error);
1558 break;
1559 }
1560
1561 case DIOCWLABEL:
1562 if (*(int *) data != 0)
1563 rs->sc_flags |= RAIDF_WLABEL;
1564 else
1565 rs->sc_flags &= ~RAIDF_WLABEL;
1566 break;
1567
1568 case DIOCGDEFLABEL:
1569 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1570 break;
1571
1572 #ifdef __HAVE_OLD_DISKLABEL
1573 case ODIOCGDEFLABEL:
1574 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1575 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1576 return ENOTTY;
1577 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1578 break;
1579 #endif
1580
1581 default:
1582 retcode = ENOTTY;
1583 }
1584 return (retcode);
1585
1586 }
1587
1588
1589 /* raidinit -- complete the rest of the initialization for the
1590 RAIDframe device. */
1591
1592
1593 static void
1594 raidinit(RF_Raid_t *raidPtr)
1595 {
1596 struct raid_softc *rs;
1597 int unit;
1598
1599 unit = raidPtr->raidid;
1600
1601 rs = &raid_softc[unit];
1602
1603 /* XXX should check return code first... */
1604 rs->sc_flags |= RAIDF_INITED;
1605
1606 /* XXX doesn't check bounds. */
1607 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1608
1609 rs->sc_dkdev.dk_name = rs->sc_xname;
1610
1611 /* disk_attach actually creates space for the CPU disklabel, among
1612 * other things, so it's critical to call this *BEFORE* we try putzing
1613 * with disklabels. */
1614
1615 disk_attach(&rs->sc_dkdev);
1616
1617 /* XXX There may be a weird interaction here between this, and
1618 * protectedSectors, as used in RAIDframe. */
1619
1620 rs->sc_size = raidPtr->totalSectors;
1621 }
1622 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1623 /* wake up the daemon & tell it to get us a spare table
1624 * XXX
1625 * the entries in the queues should be tagged with the raidPtr
1626 * so that in the extremely rare case that two recons happen at once,
1627 * we know for which device were requesting a spare table
1628 * XXX
1629 *
1630 * XXX This code is not currently used. GO
1631 */
1632 int
1633 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1634 {
1635 int retcode;
1636
1637 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1638 req->next = rf_sparet_wait_queue;
1639 rf_sparet_wait_queue = req;
1640 wakeup(&rf_sparet_wait_queue);
1641
1642 /* mpsleep unlocks the mutex */
1643 while (!rf_sparet_resp_queue) {
1644 tsleep(&rf_sparet_resp_queue, PRIBIO,
1645 "raidframe getsparetable", 0);
1646 }
1647 req = rf_sparet_resp_queue;
1648 rf_sparet_resp_queue = req->next;
1649 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1650
1651 retcode = req->fcol;
1652 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1653 * alloc'd */
1654 return (retcode);
1655 }
1656 #endif
1657
1658 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1659 * bp & passes it down.
1660 * any calls originating in the kernel must use non-blocking I/O
1661 * do some extra sanity checking to return "appropriate" error values for
1662 * certain conditions (to make some standard utilities work)
1663 *
1664 * Formerly known as: rf_DoAccessKernel
1665 */
1666 void
1667 raidstart(RF_Raid_t *raidPtr)
1668 {
1669 RF_SectorCount_t num_blocks, pb, sum;
1670 RF_RaidAddr_t raid_addr;
1671 struct partition *pp;
1672 daddr_t blocknum;
1673 int unit;
1674 struct raid_softc *rs;
1675 int do_async;
1676 struct buf *bp;
1677 int rc;
1678
1679 unit = raidPtr->raidid;
1680 rs = &raid_softc[unit];
1681
1682 /* quick check to see if anything has died recently */
1683 RF_LOCK_MUTEX(raidPtr->mutex);
1684 if (raidPtr->numNewFailures > 0) {
1685 RF_UNLOCK_MUTEX(raidPtr->mutex);
1686 rf_update_component_labels(raidPtr,
1687 RF_NORMAL_COMPONENT_UPDATE);
1688 RF_LOCK_MUTEX(raidPtr->mutex);
1689 raidPtr->numNewFailures--;
1690 }
1691
1692 /* Check to see if we're at the limit... */
1693 while (raidPtr->openings > 0) {
1694 RF_UNLOCK_MUTEX(raidPtr->mutex);
1695
1696 /* get the next item, if any, from the queue */
1697 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1698 /* nothing more to do */
1699 return;
1700 }
1701
1702 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1703 * partition.. Need to make it absolute to the underlying
1704 * device.. */
1705
1706 blocknum = bp->b_blkno;
1707 if (DISKPART(bp->b_dev) != RAW_PART) {
1708 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1709 blocknum += pp->p_offset;
1710 }
1711
1712 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1713 (int) blocknum));
1714
1715 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1716 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1717
1718 /* *THIS* is where we adjust what block we're going to...
1719 * but DO NOT TOUCH bp->b_blkno!!! */
1720 raid_addr = blocknum;
1721
1722 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1723 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1724 sum = raid_addr + num_blocks + pb;
1725 if (1 || rf_debugKernelAccess) {
1726 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1727 (int) raid_addr, (int) sum, (int) num_blocks,
1728 (int) pb, (int) bp->b_resid));
1729 }
1730 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1731 || (sum < num_blocks) || (sum < pb)) {
1732 bp->b_error = ENOSPC;
1733 bp->b_flags |= B_ERROR;
1734 bp->b_resid = bp->b_bcount;
1735 biodone(bp);
1736 RF_LOCK_MUTEX(raidPtr->mutex);
1737 continue;
1738 }
1739 /*
1740 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1741 */
1742
1743 if (bp->b_bcount & raidPtr->sectorMask) {
1744 bp->b_error = EINVAL;
1745 bp->b_flags |= B_ERROR;
1746 bp->b_resid = bp->b_bcount;
1747 biodone(bp);
1748 RF_LOCK_MUTEX(raidPtr->mutex);
1749 continue;
1750
1751 }
1752 db1_printf(("Calling DoAccess..\n"));
1753
1754
1755 RF_LOCK_MUTEX(raidPtr->mutex);
1756 raidPtr->openings--;
1757 RF_UNLOCK_MUTEX(raidPtr->mutex);
1758
1759 /*
1760 * Everything is async.
1761 */
1762 do_async = 1;
1763
1764 disk_busy(&rs->sc_dkdev);
1765
1766 /* XXX we're still at splbio() here... do we *really*
1767 need to be? */
1768
1769 /* don't ever condition on bp->b_flags & B_WRITE.
1770 * always condition on B_READ instead */
1771
1772 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1773 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1774 do_async, raid_addr, num_blocks,
1775 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1776
1777 if (rc) {
1778 bp->b_error = rc;
1779 bp->b_flags |= B_ERROR;
1780 bp->b_resid = bp->b_bcount;
1781 biodone(bp);
1782 /* continue loop */
1783 }
1784
1785 RF_LOCK_MUTEX(raidPtr->mutex);
1786 }
1787 RF_UNLOCK_MUTEX(raidPtr->mutex);
1788 }
1789
1790
1791
1792
1793 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1794
1795 int
1796 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1797 {
1798 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1799 struct buf *bp;
1800 struct raidbuf *raidbp = NULL;
1801
1802 req->queue = queue;
1803
1804 #if DIAGNOSTIC
1805 if (queue->raidPtr->raidid >= numraid) {
1806 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1807 numraid);
1808 panic("Invalid Unit number in rf_DispatchKernelIO");
1809 }
1810 #endif
1811
1812 bp = req->bp;
1813 #if 1
1814 /* XXX when there is a physical disk failure, someone is passing us a
1815 * buffer that contains old stuff!! Attempt to deal with this problem
1816 * without taking a performance hit... (not sure where the real bug
1817 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1818
1819 if (bp->b_flags & B_ERROR) {
1820 bp->b_flags &= ~B_ERROR;
1821 }
1822 if (bp->b_error != 0) {
1823 bp->b_error = 0;
1824 }
1825 #endif
1826 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1827 if (raidbp == NULL) {
1828 bp->b_flags |= B_ERROR;
1829 bp->b_error = ENOMEM;
1830 return (ENOMEM);
1831 }
1832 BUF_INIT(&raidbp->rf_buf);
1833
1834 /*
1835 * context for raidiodone
1836 */
1837 raidbp->rf_obp = bp;
1838 raidbp->req = req;
1839
1840 BIO_COPYPRIO(&raidbp->rf_buf, bp);
1841
1842 switch (req->type) {
1843 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1844 /* XXX need to do something extra here.. */
1845 /* I'm leaving this in, as I've never actually seen it used,
1846 * and I'd like folks to report it... GO */
1847 printf(("WAKEUP CALLED\n"));
1848 queue->numOutstanding++;
1849
1850 /* XXX need to glue the original buffer into this?? */
1851
1852 KernelWakeupFunc(&raidbp->rf_buf);
1853 break;
1854
1855 case RF_IO_TYPE_READ:
1856 case RF_IO_TYPE_WRITE:
1857 #if RF_ACC_TRACE > 0
1858 if (req->tracerec) {
1859 RF_ETIMER_START(req->tracerec->timer);
1860 }
1861 #endif
1862 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1863 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1864 req->sectorOffset, req->numSector,
1865 req->buf, KernelWakeupFunc, (void *) req,
1866 queue->raidPtr->logBytesPerSector, req->b_proc);
1867
1868 if (rf_debugKernelAccess) {
1869 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1870 (long) bp->b_blkno));
1871 }
1872 queue->numOutstanding++;
1873 queue->last_deq_sector = req->sectorOffset;
1874 /* acc wouldn't have been let in if there were any pending
1875 * reqs at any other priority */
1876 queue->curPriority = req->priority;
1877
1878 db1_printf(("Going for %c to unit %d col %d\n",
1879 req->type, queue->raidPtr->raidid,
1880 queue->col));
1881 db1_printf(("sector %d count %d (%d bytes) %d\n",
1882 (int) req->sectorOffset, (int) req->numSector,
1883 (int) (req->numSector <<
1884 queue->raidPtr->logBytesPerSector),
1885 (int) queue->raidPtr->logBytesPerSector));
1886 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1887 raidbp->rf_buf.b_vp->v_numoutput++;
1888 }
1889 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
1890
1891 break;
1892
1893 default:
1894 panic("bad req->type in rf_DispatchKernelIO");
1895 }
1896 db1_printf(("Exiting from DispatchKernelIO\n"));
1897
1898 return (0);
1899 }
1900 /* this is the callback function associated with a I/O invoked from
1901 kernel code.
1902 */
1903 static void
1904 KernelWakeupFunc(struct buf *vbp)
1905 {
1906 RF_DiskQueueData_t *req = NULL;
1907 RF_DiskQueue_t *queue;
1908 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1909 struct buf *bp;
1910 int s;
1911
1912 s = splbio();
1913 db1_printf(("recovering the request queue:\n"));
1914 req = raidbp->req;
1915
1916 bp = raidbp->rf_obp;
1917
1918 queue = (RF_DiskQueue_t *) req->queue;
1919
1920 if (raidbp->rf_buf.b_flags & B_ERROR) {
1921 bp->b_flags |= B_ERROR;
1922 bp->b_error = raidbp->rf_buf.b_error ?
1923 raidbp->rf_buf.b_error : EIO;
1924 }
1925
1926 /* XXX methinks this could be wrong... */
1927 #if 1
1928 bp->b_resid = raidbp->rf_buf.b_resid;
1929 #endif
1930 #if RF_ACC_TRACE > 0
1931 if (req->tracerec) {
1932 RF_ETIMER_STOP(req->tracerec->timer);
1933 RF_ETIMER_EVAL(req->tracerec->timer);
1934 RF_LOCK_MUTEX(rf_tracing_mutex);
1935 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1936 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1937 req->tracerec->num_phys_ios++;
1938 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1939 }
1940 #endif
1941 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1942
1943 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1944 * ballistic, and mark the component as hosed... */
1945
1946 if (bp->b_flags & B_ERROR) {
1947 /* Mark the disk as dead */
1948 /* but only mark it once... */
1949 if (queue->raidPtr->Disks[queue->col].status ==
1950 rf_ds_optimal) {
1951 printf("raid%d: IO Error. Marking %s as failed.\n",
1952 queue->raidPtr->raidid,
1953 queue->raidPtr->Disks[queue->col].devname);
1954 queue->raidPtr->Disks[queue->col].status =
1955 rf_ds_failed;
1956 queue->raidPtr->status = rf_rs_degraded;
1957 queue->raidPtr->numFailures++;
1958 queue->raidPtr->numNewFailures++;
1959 } else { /* Disk is already dead... */
1960 /* printf("Disk already marked as dead!\n"); */
1961 }
1962
1963 }
1964
1965 pool_put(&rf_pools.cbuf, raidbp);
1966
1967 /* Fill in the error value */
1968
1969 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1970
1971 simple_lock(&queue->raidPtr->iodone_lock);
1972
1973 /* Drop this one on the "finished" queue... */
1974 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1975
1976 /* Let the raidio thread know there is work to be done. */
1977 wakeup(&(queue->raidPtr->iodone));
1978
1979 simple_unlock(&queue->raidPtr->iodone_lock);
1980
1981 splx(s);
1982 }
1983
1984
1985
1986 /*
1987 * initialize a buf structure for doing an I/O in the kernel.
1988 */
1989 static void
1990 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1991 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
1992 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1993 struct proc *b_proc)
1994 {
1995 /* bp->b_flags = B_PHYS | rw_flag; */
1996 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1997 bp->b_bcount = numSect << logBytesPerSector;
1998 bp->b_bufsize = bp->b_bcount;
1999 bp->b_error = 0;
2000 bp->b_dev = dev;
2001 bp->b_data = buf;
2002 bp->b_blkno = startSect;
2003 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2004 if (bp->b_bcount == 0) {
2005 panic("bp->b_bcount is zero in InitBP!!");
2006 }
2007 bp->b_proc = b_proc;
2008 bp->b_iodone = cbFunc;
2009 bp->b_vp = b_vp;
2010
2011 }
2012
2013 static void
2014 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2015 struct disklabel *lp)
2016 {
2017 memset(lp, 0, sizeof(*lp));
2018
2019 /* fabricate a label... */
2020 lp->d_secperunit = raidPtr->totalSectors;
2021 lp->d_secsize = raidPtr->bytesPerSector;
2022 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2023 lp->d_ntracks = 4 * raidPtr->numCol;
2024 lp->d_ncylinders = raidPtr->totalSectors /
2025 (lp->d_nsectors * lp->d_ntracks);
2026 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2027
2028 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2029 lp->d_type = DTYPE_RAID;
2030 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2031 lp->d_rpm = 3600;
2032 lp->d_interleave = 1;
2033 lp->d_flags = 0;
2034
2035 lp->d_partitions[RAW_PART].p_offset = 0;
2036 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2037 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2038 lp->d_npartitions = RAW_PART + 1;
2039
2040 lp->d_magic = DISKMAGIC;
2041 lp->d_magic2 = DISKMAGIC;
2042 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2043
2044 }
2045 /*
2046 * Read the disklabel from the raid device. If one is not present, fake one
2047 * up.
2048 */
2049 static void
2050 raidgetdisklabel(dev_t dev)
2051 {
2052 int unit = raidunit(dev);
2053 struct raid_softc *rs = &raid_softc[unit];
2054 const char *errstring;
2055 struct disklabel *lp = rs->sc_dkdev.dk_label;
2056 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2057 RF_Raid_t *raidPtr;
2058
2059 db1_printf(("Getting the disklabel...\n"));
2060
2061 memset(clp, 0, sizeof(*clp));
2062
2063 raidPtr = raidPtrs[unit];
2064
2065 raidgetdefaultlabel(raidPtr, rs, lp);
2066
2067 /*
2068 * Call the generic disklabel extraction routine.
2069 */
2070 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2071 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2072 if (errstring)
2073 raidmakedisklabel(rs);
2074 else {
2075 int i;
2076 struct partition *pp;
2077
2078 /*
2079 * Sanity check whether the found disklabel is valid.
2080 *
2081 * This is necessary since total size of the raid device
2082 * may vary when an interleave is changed even though exactly
2083 * same componets are used, and old disklabel may used
2084 * if that is found.
2085 */
2086 if (lp->d_secperunit != rs->sc_size)
2087 printf("raid%d: WARNING: %s: "
2088 "total sector size in disklabel (%d) != "
2089 "the size of raid (%ld)\n", unit, rs->sc_xname,
2090 lp->d_secperunit, (long) rs->sc_size);
2091 for (i = 0; i < lp->d_npartitions; i++) {
2092 pp = &lp->d_partitions[i];
2093 if (pp->p_offset + pp->p_size > rs->sc_size)
2094 printf("raid%d: WARNING: %s: end of partition `%c' "
2095 "exceeds the size of raid (%ld)\n",
2096 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2097 }
2098 }
2099
2100 }
2101 /*
2102 * Take care of things one might want to take care of in the event
2103 * that a disklabel isn't present.
2104 */
2105 static void
2106 raidmakedisklabel(struct raid_softc *rs)
2107 {
2108 struct disklabel *lp = rs->sc_dkdev.dk_label;
2109 db1_printf(("Making a label..\n"));
2110
2111 /*
2112 * For historical reasons, if there's no disklabel present
2113 * the raw partition must be marked FS_BSDFFS.
2114 */
2115
2116 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2117
2118 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2119
2120 lp->d_checksum = dkcksum(lp);
2121 }
2122 /*
2123 * Lookup the provided name in the filesystem. If the file exists,
2124 * is a valid block device, and isn't being used by anyone else,
2125 * set *vpp to the file's vnode.
2126 * You'll find the original of this in ccd.c
2127 */
2128 int
2129 raidlookup(char *path, struct proc *p, struct vnode **vpp)
2130 {
2131 struct nameidata nd;
2132 struct vnode *vp;
2133 struct vattr va;
2134 int error;
2135
2136 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2137 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2138 return (error);
2139 }
2140 vp = nd.ni_vp;
2141 if (vp->v_usecount > 1) {
2142 VOP_UNLOCK(vp, 0);
2143 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2144 return (EBUSY);
2145 }
2146 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2147 VOP_UNLOCK(vp, 0);
2148 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2149 return (error);
2150 }
2151 /* XXX: eventually we should handle VREG, too. */
2152 if (va.va_type != VBLK) {
2153 VOP_UNLOCK(vp, 0);
2154 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2155 return (ENOTBLK);
2156 }
2157 VOP_UNLOCK(vp, 0);
2158 *vpp = vp;
2159 return (0);
2160 }
2161 /*
2162 * Wait interruptibly for an exclusive lock.
2163 *
2164 * XXX
2165 * Several drivers do this; it should be abstracted and made MP-safe.
2166 * (Hmm... where have we seen this warning before :-> GO )
2167 */
2168 static int
2169 raidlock(struct raid_softc *rs)
2170 {
2171 int error;
2172
2173 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2174 rs->sc_flags |= RAIDF_WANTED;
2175 if ((error =
2176 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2177 return (error);
2178 }
2179 rs->sc_flags |= RAIDF_LOCKED;
2180 return (0);
2181 }
2182 /*
2183 * Unlock and wake up any waiters.
2184 */
2185 static void
2186 raidunlock(struct raid_softc *rs)
2187 {
2188
2189 rs->sc_flags &= ~RAIDF_LOCKED;
2190 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2191 rs->sc_flags &= ~RAIDF_WANTED;
2192 wakeup(rs);
2193 }
2194 }
2195
2196
2197 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2198 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2199
2200 int
2201 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2202 {
2203 RF_ComponentLabel_t clabel;
2204 raidread_component_label(dev, b_vp, &clabel);
2205 clabel.mod_counter = mod_counter;
2206 clabel.clean = RF_RAID_CLEAN;
2207 raidwrite_component_label(dev, b_vp, &clabel);
2208 return(0);
2209 }
2210
2211
2212 int
2213 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2214 {
2215 RF_ComponentLabel_t clabel;
2216 raidread_component_label(dev, b_vp, &clabel);
2217 clabel.mod_counter = mod_counter;
2218 clabel.clean = RF_RAID_DIRTY;
2219 raidwrite_component_label(dev, b_vp, &clabel);
2220 return(0);
2221 }
2222
2223 /* ARGSUSED */
2224 int
2225 raidread_component_label(dev_t dev, struct vnode *b_vp,
2226 RF_ComponentLabel_t *clabel)
2227 {
2228 struct buf *bp;
2229 const struct bdevsw *bdev;
2230 int error;
2231
2232 /* XXX should probably ensure that we don't try to do this if
2233 someone has changed rf_protected_sectors. */
2234
2235 if (b_vp == NULL) {
2236 /* For whatever reason, this component is not valid.
2237 Don't try to read a component label from it. */
2238 return(EINVAL);
2239 }
2240
2241 /* get a block of the appropriate size... */
2242 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2243 bp->b_dev = dev;
2244
2245 /* get our ducks in a row for the read */
2246 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2247 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2248 bp->b_flags |= B_READ;
2249 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2250
2251 bdev = bdevsw_lookup(bp->b_dev);
2252 if (bdev == NULL)
2253 return (ENXIO);
2254 (*bdev->d_strategy)(bp);
2255
2256 error = biowait(bp);
2257
2258 if (!error) {
2259 memcpy(clabel, bp->b_data,
2260 sizeof(RF_ComponentLabel_t));
2261 }
2262
2263 brelse(bp);
2264 return(error);
2265 }
2266 /* ARGSUSED */
2267 int
2268 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2269 RF_ComponentLabel_t *clabel)
2270 {
2271 struct buf *bp;
2272 const struct bdevsw *bdev;
2273 int error;
2274
2275 /* get a block of the appropriate size... */
2276 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2277 bp->b_dev = dev;
2278
2279 /* get our ducks in a row for the write */
2280 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2281 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2282 bp->b_flags |= B_WRITE;
2283 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2284
2285 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2286
2287 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2288
2289 bdev = bdevsw_lookup(bp->b_dev);
2290 if (bdev == NULL)
2291 return (ENXIO);
2292 (*bdev->d_strategy)(bp);
2293 error = biowait(bp);
2294 brelse(bp);
2295 if (error) {
2296 #if 1
2297 printf("Failed to write RAID component info!\n");
2298 #endif
2299 }
2300
2301 return(error);
2302 }
2303
2304 void
2305 rf_markalldirty(RF_Raid_t *raidPtr)
2306 {
2307 RF_ComponentLabel_t clabel;
2308 int sparecol;
2309 int c;
2310 int j;
2311 int scol = -1;
2312
2313 raidPtr->mod_counter++;
2314 for (c = 0; c < raidPtr->numCol; c++) {
2315 /* we don't want to touch (at all) a disk that has
2316 failed */
2317 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2318 raidread_component_label(
2319 raidPtr->Disks[c].dev,
2320 raidPtr->raid_cinfo[c].ci_vp,
2321 &clabel);
2322 if (clabel.status == rf_ds_spared) {
2323 /* XXX do something special...
2324 but whatever you do, don't
2325 try to access it!! */
2326 } else {
2327 raidmarkdirty(
2328 raidPtr->Disks[c].dev,
2329 raidPtr->raid_cinfo[c].ci_vp,
2330 raidPtr->mod_counter);
2331 }
2332 }
2333 }
2334
2335 for( c = 0; c < raidPtr->numSpare ; c++) {
2336 sparecol = raidPtr->numCol + c;
2337 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2338 /*
2339
2340 we claim this disk is "optimal" if it's
2341 rf_ds_used_spare, as that means it should be
2342 directly substitutable for the disk it replaced.
2343 We note that too...
2344
2345 */
2346
2347 for(j=0;j<raidPtr->numCol;j++) {
2348 if (raidPtr->Disks[j].spareCol == sparecol) {
2349 scol = j;
2350 break;
2351 }
2352 }
2353
2354 raidread_component_label(
2355 raidPtr->Disks[sparecol].dev,
2356 raidPtr->raid_cinfo[sparecol].ci_vp,
2357 &clabel);
2358 /* make sure status is noted */
2359
2360 raid_init_component_label(raidPtr, &clabel);
2361
2362 clabel.row = 0;
2363 clabel.column = scol;
2364 /* Note: we *don't* change status from rf_ds_used_spare
2365 to rf_ds_optimal */
2366 /* clabel.status = rf_ds_optimal; */
2367
2368 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2369 raidPtr->raid_cinfo[sparecol].ci_vp,
2370 raidPtr->mod_counter);
2371 }
2372 }
2373 }
2374
2375
2376 void
2377 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2378 {
2379 RF_ComponentLabel_t clabel;
2380 int sparecol;
2381 int c;
2382 int j;
2383 int scol;
2384
2385 scol = -1;
2386
2387 /* XXX should do extra checks to make sure things really are clean,
2388 rather than blindly setting the clean bit... */
2389
2390 raidPtr->mod_counter++;
2391
2392 for (c = 0; c < raidPtr->numCol; c++) {
2393 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2394 raidread_component_label(
2395 raidPtr->Disks[c].dev,
2396 raidPtr->raid_cinfo[c].ci_vp,
2397 &clabel);
2398 /* make sure status is noted */
2399 clabel.status = rf_ds_optimal;
2400 /* bump the counter */
2401 clabel.mod_counter = raidPtr->mod_counter;
2402
2403 raidwrite_component_label(
2404 raidPtr->Disks[c].dev,
2405 raidPtr->raid_cinfo[c].ci_vp,
2406 &clabel);
2407 if (final == RF_FINAL_COMPONENT_UPDATE) {
2408 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2409 raidmarkclean(
2410 raidPtr->Disks[c].dev,
2411 raidPtr->raid_cinfo[c].ci_vp,
2412 raidPtr->mod_counter);
2413 }
2414 }
2415 }
2416 /* else we don't touch it.. */
2417 }
2418
2419 for( c = 0; c < raidPtr->numSpare ; c++) {
2420 sparecol = raidPtr->numCol + c;
2421 /* Need to ensure that the reconstruct actually completed! */
2422 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2423 /*
2424
2425 we claim this disk is "optimal" if it's
2426 rf_ds_used_spare, as that means it should be
2427 directly substitutable for the disk it replaced.
2428 We note that too...
2429
2430 */
2431
2432 for(j=0;j<raidPtr->numCol;j++) {
2433 if (raidPtr->Disks[j].spareCol == sparecol) {
2434 scol = j;
2435 break;
2436 }
2437 }
2438
2439 /* XXX shouldn't *really* need this... */
2440 raidread_component_label(
2441 raidPtr->Disks[sparecol].dev,
2442 raidPtr->raid_cinfo[sparecol].ci_vp,
2443 &clabel);
2444 /* make sure status is noted */
2445
2446 raid_init_component_label(raidPtr, &clabel);
2447
2448 clabel.mod_counter = raidPtr->mod_counter;
2449 clabel.column = scol;
2450 clabel.status = rf_ds_optimal;
2451
2452 raidwrite_component_label(
2453 raidPtr->Disks[sparecol].dev,
2454 raidPtr->raid_cinfo[sparecol].ci_vp,
2455 &clabel);
2456 if (final == RF_FINAL_COMPONENT_UPDATE) {
2457 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2458 raidmarkclean( raidPtr->Disks[sparecol].dev,
2459 raidPtr->raid_cinfo[sparecol].ci_vp,
2460 raidPtr->mod_counter);
2461 }
2462 }
2463 }
2464 }
2465 }
2466
2467 void
2468 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2469 {
2470 struct proc *p;
2471
2472 p = raidPtr->engine_thread;
2473
2474 if (vp != NULL) {
2475 if (auto_configured == 1) {
2476 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2477 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2478 vput(vp);
2479
2480 } else {
2481 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2482 }
2483 }
2484 }
2485
2486
2487 void
2488 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2489 {
2490 int r,c;
2491 struct vnode *vp;
2492 int acd;
2493
2494
2495 /* We take this opportunity to close the vnodes like we should.. */
2496
2497 for (c = 0; c < raidPtr->numCol; c++) {
2498 vp = raidPtr->raid_cinfo[c].ci_vp;
2499 acd = raidPtr->Disks[c].auto_configured;
2500 rf_close_component(raidPtr, vp, acd);
2501 raidPtr->raid_cinfo[c].ci_vp = NULL;
2502 raidPtr->Disks[c].auto_configured = 0;
2503 }
2504
2505 for (r = 0; r < raidPtr->numSpare; r++) {
2506 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2507 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2508 rf_close_component(raidPtr, vp, acd);
2509 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2510 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2511 }
2512 }
2513
2514
2515 void
2516 rf_ReconThread(struct rf_recon_req *req)
2517 {
2518 int s;
2519 RF_Raid_t *raidPtr;
2520
2521 s = splbio();
2522 raidPtr = (RF_Raid_t *) req->raidPtr;
2523 raidPtr->recon_in_progress = 1;
2524
2525 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2526 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2527
2528 RF_Free(req, sizeof(*req));
2529
2530 raidPtr->recon_in_progress = 0;
2531 splx(s);
2532
2533 /* That's all... */
2534 kthread_exit(0); /* does not return */
2535 }
2536
2537 void
2538 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2539 {
2540 int retcode;
2541 int s;
2542
2543 raidPtr->parity_rewrite_in_progress = 1;
2544 s = splbio();
2545 retcode = rf_RewriteParity(raidPtr);
2546 splx(s);
2547 if (retcode) {
2548 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2549 } else {
2550 /* set the clean bit! If we shutdown correctly,
2551 the clean bit on each component label will get
2552 set */
2553 raidPtr->parity_good = RF_RAID_CLEAN;
2554 }
2555 raidPtr->parity_rewrite_in_progress = 0;
2556
2557 /* Anyone waiting for us to stop? If so, inform them... */
2558 if (raidPtr->waitShutdown) {
2559 wakeup(&raidPtr->parity_rewrite_in_progress);
2560 }
2561
2562 /* That's all... */
2563 kthread_exit(0); /* does not return */
2564 }
2565
2566
2567 void
2568 rf_CopybackThread(RF_Raid_t *raidPtr)
2569 {
2570 int s;
2571
2572 raidPtr->copyback_in_progress = 1;
2573 s = splbio();
2574 rf_CopybackReconstructedData(raidPtr);
2575 splx(s);
2576 raidPtr->copyback_in_progress = 0;
2577
2578 /* That's all... */
2579 kthread_exit(0); /* does not return */
2580 }
2581
2582
2583 void
2584 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2585 {
2586 int s;
2587 RF_Raid_t *raidPtr;
2588
2589 s = splbio();
2590 raidPtr = req->raidPtr;
2591 raidPtr->recon_in_progress = 1;
2592 rf_ReconstructInPlace(raidPtr, req->col);
2593 RF_Free(req, sizeof(*req));
2594 raidPtr->recon_in_progress = 0;
2595 splx(s);
2596
2597 /* That's all... */
2598 kthread_exit(0); /* does not return */
2599 }
2600
2601 RF_AutoConfig_t *
2602 rf_find_raid_components()
2603 {
2604 struct vnode *vp;
2605 struct disklabel label;
2606 struct device *dv;
2607 dev_t dev;
2608 int bmajor;
2609 int error;
2610 int i;
2611 int good_one;
2612 RF_ComponentLabel_t *clabel;
2613 RF_AutoConfig_t *ac_list;
2614 RF_AutoConfig_t *ac;
2615
2616
2617 /* initialize the AutoConfig list */
2618 ac_list = NULL;
2619
2620 /* we begin by trolling through *all* the devices on the system */
2621
2622 for (dv = alldevs.tqh_first; dv != NULL;
2623 dv = dv->dv_list.tqe_next) {
2624
2625 /* we are only interested in disks... */
2626 if (dv->dv_class != DV_DISK)
2627 continue;
2628
2629 /* we don't care about floppies... */
2630 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2631 continue;
2632 }
2633
2634 /* we don't care about CD's... */
2635 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2636 continue;
2637 }
2638
2639 /* hdfd is the Atari/Hades floppy driver */
2640 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2641 continue;
2642 }
2643 /* fdisa is the Atari/Milan floppy driver */
2644 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2645 continue;
2646 }
2647
2648 /* need to find the device_name_to_block_device_major stuff */
2649 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2650
2651 /* get a vnode for the raw partition of this disk */
2652
2653 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2654 if (bdevvp(dev, &vp))
2655 panic("RAID can't alloc vnode");
2656
2657 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2658
2659 if (error) {
2660 /* "Who cares." Continue looking
2661 for something that exists*/
2662 vput(vp);
2663 continue;
2664 }
2665
2666 /* Ok, the disk exists. Go get the disklabel. */
2667 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2668 if (error) {
2669 /*
2670 * XXX can't happen - open() would
2671 * have errored out (or faked up one)
2672 */
2673 printf("can't get label for dev %s%c (%d)!?!?\n",
2674 dv->dv_xname, 'a' + RAW_PART, error);
2675 }
2676
2677 /* don't need this any more. We'll allocate it again
2678 a little later if we really do... */
2679 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2680 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2681 vput(vp);
2682
2683 for (i=0; i < label.d_npartitions; i++) {
2684 /* We only support partitions marked as RAID */
2685 if (label.d_partitions[i].p_fstype != FS_RAID)
2686 continue;
2687
2688 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2689 if (bdevvp(dev, &vp))
2690 panic("RAID can't alloc vnode");
2691
2692 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2693 if (error) {
2694 /* Whatever... */
2695 vput(vp);
2696 continue;
2697 }
2698
2699 good_one = 0;
2700
2701 clabel = (RF_ComponentLabel_t *)
2702 malloc(sizeof(RF_ComponentLabel_t),
2703 M_RAIDFRAME, M_NOWAIT);
2704 if (clabel == NULL) {
2705 /* XXX CLEANUP HERE */
2706 printf("RAID auto config: out of memory!\n");
2707 return(NULL); /* XXX probably should panic? */
2708 }
2709
2710 if (!raidread_component_label(dev, vp, clabel)) {
2711 /* Got the label. Does it look reasonable? */
2712 if (rf_reasonable_label(clabel) &&
2713 (clabel->partitionSize <=
2714 label.d_partitions[i].p_size)) {
2715 #if DEBUG
2716 printf("Component on: %s%c: %d\n",
2717 dv->dv_xname, 'a'+i,
2718 label.d_partitions[i].p_size);
2719 rf_print_component_label(clabel);
2720 #endif
2721 /* if it's reasonable, add it,
2722 else ignore it. */
2723 ac = (RF_AutoConfig_t *)
2724 malloc(sizeof(RF_AutoConfig_t),
2725 M_RAIDFRAME,
2726 M_NOWAIT);
2727 if (ac == NULL) {
2728 /* XXX should panic?? */
2729 return(NULL);
2730 }
2731
2732 snprintf(ac->devname,
2733 sizeof(ac->devname), "%s%c",
2734 dv->dv_xname, 'a'+i);
2735 ac->dev = dev;
2736 ac->vp = vp;
2737 ac->clabel = clabel;
2738 ac->next = ac_list;
2739 ac_list = ac;
2740 good_one = 1;
2741 }
2742 }
2743 if (!good_one) {
2744 /* cleanup */
2745 free(clabel, M_RAIDFRAME);
2746 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2747 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2748 vput(vp);
2749 }
2750 }
2751 }
2752 return(ac_list);
2753 }
2754
2755 static int
2756 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2757 {
2758
2759 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2760 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2761 ((clabel->clean == RF_RAID_CLEAN) ||
2762 (clabel->clean == RF_RAID_DIRTY)) &&
2763 clabel->row >=0 &&
2764 clabel->column >= 0 &&
2765 clabel->num_rows > 0 &&
2766 clabel->num_columns > 0 &&
2767 clabel->row < clabel->num_rows &&
2768 clabel->column < clabel->num_columns &&
2769 clabel->blockSize > 0 &&
2770 clabel->numBlocks > 0) {
2771 /* label looks reasonable enough... */
2772 return(1);
2773 }
2774 return(0);
2775 }
2776
2777
2778 #if DEBUG
2779 void
2780 rf_print_component_label(RF_ComponentLabel_t *clabel)
2781 {
2782 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2783 clabel->row, clabel->column,
2784 clabel->num_rows, clabel->num_columns);
2785 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2786 clabel->version, clabel->serial_number,
2787 clabel->mod_counter);
2788 printf(" Clean: %s Status: %d\n",
2789 clabel->clean ? "Yes" : "No", clabel->status );
2790 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2791 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2792 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2793 (char) clabel->parityConfig, clabel->blockSize,
2794 clabel->numBlocks);
2795 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2796 printf(" Contains root partition: %s\n",
2797 clabel->root_partition ? "Yes" : "No" );
2798 printf(" Last configured as: raid%d\n", clabel->last_unit );
2799 #if 0
2800 printf(" Config order: %d\n", clabel->config_order);
2801 #endif
2802
2803 }
2804 #endif
2805
2806 RF_ConfigSet_t *
2807 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2808 {
2809 RF_AutoConfig_t *ac;
2810 RF_ConfigSet_t *config_sets;
2811 RF_ConfigSet_t *cset;
2812 RF_AutoConfig_t *ac_next;
2813
2814
2815 config_sets = NULL;
2816
2817 /* Go through the AutoConfig list, and figure out which components
2818 belong to what sets. */
2819 ac = ac_list;
2820 while(ac!=NULL) {
2821 /* we're going to putz with ac->next, so save it here
2822 for use at the end of the loop */
2823 ac_next = ac->next;
2824
2825 if (config_sets == NULL) {
2826 /* will need at least this one... */
2827 config_sets = (RF_ConfigSet_t *)
2828 malloc(sizeof(RF_ConfigSet_t),
2829 M_RAIDFRAME, M_NOWAIT);
2830 if (config_sets == NULL) {
2831 panic("rf_create_auto_sets: No memory!");
2832 }
2833 /* this one is easy :) */
2834 config_sets->ac = ac;
2835 config_sets->next = NULL;
2836 config_sets->rootable = 0;
2837 ac->next = NULL;
2838 } else {
2839 /* which set does this component fit into? */
2840 cset = config_sets;
2841 while(cset!=NULL) {
2842 if (rf_does_it_fit(cset, ac)) {
2843 /* looks like it matches... */
2844 ac->next = cset->ac;
2845 cset->ac = ac;
2846 break;
2847 }
2848 cset = cset->next;
2849 }
2850 if (cset==NULL) {
2851 /* didn't find a match above... new set..*/
2852 cset = (RF_ConfigSet_t *)
2853 malloc(sizeof(RF_ConfigSet_t),
2854 M_RAIDFRAME, M_NOWAIT);
2855 if (cset == NULL) {
2856 panic("rf_create_auto_sets: No memory!");
2857 }
2858 cset->ac = ac;
2859 ac->next = NULL;
2860 cset->next = config_sets;
2861 cset->rootable = 0;
2862 config_sets = cset;
2863 }
2864 }
2865 ac = ac_next;
2866 }
2867
2868
2869 return(config_sets);
2870 }
2871
2872 static int
2873 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2874 {
2875 RF_ComponentLabel_t *clabel1, *clabel2;
2876
2877 /* If this one matches the *first* one in the set, that's good
2878 enough, since the other members of the set would have been
2879 through here too... */
2880 /* note that we are not checking partitionSize here..
2881
2882 Note that we are also not checking the mod_counters here.
2883 If everything else matches execpt the mod_counter, that's
2884 good enough for this test. We will deal with the mod_counters
2885 a little later in the autoconfiguration process.
2886
2887 (clabel1->mod_counter == clabel2->mod_counter) &&
2888
2889 The reason we don't check for this is that failed disks
2890 will have lower modification counts. If those disks are
2891 not added to the set they used to belong to, then they will
2892 form their own set, which may result in 2 different sets,
2893 for example, competing to be configured at raid0, and
2894 perhaps competing to be the root filesystem set. If the
2895 wrong ones get configured, or both attempt to become /,
2896 weird behaviour and or serious lossage will occur. Thus we
2897 need to bring them into the fold here, and kick them out at
2898 a later point.
2899
2900 */
2901
2902 clabel1 = cset->ac->clabel;
2903 clabel2 = ac->clabel;
2904 if ((clabel1->version == clabel2->version) &&
2905 (clabel1->serial_number == clabel2->serial_number) &&
2906 (clabel1->num_rows == clabel2->num_rows) &&
2907 (clabel1->num_columns == clabel2->num_columns) &&
2908 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2909 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2910 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2911 (clabel1->parityConfig == clabel2->parityConfig) &&
2912 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2913 (clabel1->blockSize == clabel2->blockSize) &&
2914 (clabel1->numBlocks == clabel2->numBlocks) &&
2915 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2916 (clabel1->root_partition == clabel2->root_partition) &&
2917 (clabel1->last_unit == clabel2->last_unit) &&
2918 (clabel1->config_order == clabel2->config_order)) {
2919 /* if it get's here, it almost *has* to be a match */
2920 } else {
2921 /* it's not consistent with somebody in the set..
2922 punt */
2923 return(0);
2924 }
2925 /* all was fine.. it must fit... */
2926 return(1);
2927 }
2928
2929 int
2930 rf_have_enough_components(RF_ConfigSet_t *cset)
2931 {
2932 RF_AutoConfig_t *ac;
2933 RF_AutoConfig_t *auto_config;
2934 RF_ComponentLabel_t *clabel;
2935 int c;
2936 int num_cols;
2937 int num_missing;
2938 int mod_counter;
2939 int mod_counter_found;
2940 int even_pair_failed;
2941 char parity_type;
2942
2943
2944 /* check to see that we have enough 'live' components
2945 of this set. If so, we can configure it if necessary */
2946
2947 num_cols = cset->ac->clabel->num_columns;
2948 parity_type = cset->ac->clabel->parityConfig;
2949
2950 /* XXX Check for duplicate components!?!?!? */
2951
2952 /* Determine what the mod_counter is supposed to be for this set. */
2953
2954 mod_counter_found = 0;
2955 mod_counter = 0;
2956 ac = cset->ac;
2957 while(ac!=NULL) {
2958 if (mod_counter_found==0) {
2959 mod_counter = ac->clabel->mod_counter;
2960 mod_counter_found = 1;
2961 } else {
2962 if (ac->clabel->mod_counter > mod_counter) {
2963 mod_counter = ac->clabel->mod_counter;
2964 }
2965 }
2966 ac = ac->next;
2967 }
2968
2969 num_missing = 0;
2970 auto_config = cset->ac;
2971
2972 even_pair_failed = 0;
2973 for(c=0; c<num_cols; c++) {
2974 ac = auto_config;
2975 while(ac!=NULL) {
2976 if ((ac->clabel->column == c) &&
2977 (ac->clabel->mod_counter == mod_counter)) {
2978 /* it's this one... */
2979 #if DEBUG
2980 printf("Found: %s at %d\n",
2981 ac->devname,c);
2982 #endif
2983 break;
2984 }
2985 ac=ac->next;
2986 }
2987 if (ac==NULL) {
2988 /* Didn't find one here! */
2989 /* special case for RAID 1, especially
2990 where there are more than 2
2991 components (where RAIDframe treats
2992 things a little differently :( ) */
2993 if (parity_type == '1') {
2994 if (c%2 == 0) { /* even component */
2995 even_pair_failed = 1;
2996 } else { /* odd component. If
2997 we're failed, and
2998 so is the even
2999 component, it's
3000 "Good Night, Charlie" */
3001 if (even_pair_failed == 1) {
3002 return(0);
3003 }
3004 }
3005 } else {
3006 /* normal accounting */
3007 num_missing++;
3008 }
3009 }
3010 if ((parity_type == '1') && (c%2 == 1)) {
3011 /* Just did an even component, and we didn't
3012 bail.. reset the even_pair_failed flag,
3013 and go on to the next component.... */
3014 even_pair_failed = 0;
3015 }
3016 }
3017
3018 clabel = cset->ac->clabel;
3019
3020 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3021 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3022 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3023 /* XXX this needs to be made *much* more general */
3024 /* Too many failures */
3025 return(0);
3026 }
3027 /* otherwise, all is well, and we've got enough to take a kick
3028 at autoconfiguring this set */
3029 return(1);
3030 }
3031
3032 void
3033 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3034 RF_Raid_t *raidPtr)
3035 {
3036 RF_ComponentLabel_t *clabel;
3037 int i;
3038
3039 clabel = ac->clabel;
3040
3041 /* 1. Fill in the common stuff */
3042 config->numRow = clabel->num_rows = 1;
3043 config->numCol = clabel->num_columns;
3044 config->numSpare = 0; /* XXX should this be set here? */
3045 config->sectPerSU = clabel->sectPerSU;
3046 config->SUsPerPU = clabel->SUsPerPU;
3047 config->SUsPerRU = clabel->SUsPerRU;
3048 config->parityConfig = clabel->parityConfig;
3049 /* XXX... */
3050 strcpy(config->diskQueueType,"fifo");
3051 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3052 config->layoutSpecificSize = 0; /* XXX ?? */
3053
3054 while(ac!=NULL) {
3055 /* row/col values will be in range due to the checks
3056 in reasonable_label() */
3057 strcpy(config->devnames[0][ac->clabel->column],
3058 ac->devname);
3059 ac = ac->next;
3060 }
3061
3062 for(i=0;i<RF_MAXDBGV;i++) {
3063 config->debugVars[i][0] = 0;
3064 }
3065 }
3066
3067 int
3068 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3069 {
3070 RF_ComponentLabel_t clabel;
3071 struct vnode *vp;
3072 dev_t dev;
3073 int column;
3074 int sparecol;
3075
3076 raidPtr->autoconfigure = new_value;
3077
3078 for(column=0; column<raidPtr->numCol; column++) {
3079 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3080 dev = raidPtr->Disks[column].dev;
3081 vp = raidPtr->raid_cinfo[column].ci_vp;
3082 raidread_component_label(dev, vp, &clabel);
3083 clabel.autoconfigure = new_value;
3084 raidwrite_component_label(dev, vp, &clabel);
3085 }
3086 }
3087 for(column = 0; column < raidPtr->numSpare ; column++) {
3088 sparecol = raidPtr->numCol + column;
3089 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3090 dev = raidPtr->Disks[sparecol].dev;
3091 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3092 raidread_component_label(dev, vp, &clabel);
3093 clabel.autoconfigure = new_value;
3094 raidwrite_component_label(dev, vp, &clabel);
3095 }
3096 }
3097 return(new_value);
3098 }
3099
3100 int
3101 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3102 {
3103 RF_ComponentLabel_t clabel;
3104 struct vnode *vp;
3105 dev_t dev;
3106 int column;
3107 int sparecol;
3108
3109 raidPtr->root_partition = new_value;
3110 for(column=0; column<raidPtr->numCol; column++) {
3111 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3112 dev = raidPtr->Disks[column].dev;
3113 vp = raidPtr->raid_cinfo[column].ci_vp;
3114 raidread_component_label(dev, vp, &clabel);
3115 clabel.root_partition = new_value;
3116 raidwrite_component_label(dev, vp, &clabel);
3117 }
3118 }
3119 for(column = 0; column < raidPtr->numSpare ; column++) {
3120 sparecol = raidPtr->numCol + column;
3121 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3122 dev = raidPtr->Disks[sparecol].dev;
3123 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3124 raidread_component_label(dev, vp, &clabel);
3125 clabel.root_partition = new_value;
3126 raidwrite_component_label(dev, vp, &clabel);
3127 }
3128 }
3129 return(new_value);
3130 }
3131
3132 void
3133 rf_release_all_vps(RF_ConfigSet_t *cset)
3134 {
3135 RF_AutoConfig_t *ac;
3136
3137 ac = cset->ac;
3138 while(ac!=NULL) {
3139 /* Close the vp, and give it back */
3140 if (ac->vp) {
3141 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3142 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3143 vput(ac->vp);
3144 ac->vp = NULL;
3145 }
3146 ac = ac->next;
3147 }
3148 }
3149
3150
3151 void
3152 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3153 {
3154 RF_AutoConfig_t *ac;
3155 RF_AutoConfig_t *next_ac;
3156
3157 ac = cset->ac;
3158 while(ac!=NULL) {
3159 next_ac = ac->next;
3160 /* nuke the label */
3161 free(ac->clabel, M_RAIDFRAME);
3162 /* cleanup the config structure */
3163 free(ac, M_RAIDFRAME);
3164 /* "next.." */
3165 ac = next_ac;
3166 }
3167 /* and, finally, nuke the config set */
3168 free(cset, M_RAIDFRAME);
3169 }
3170
3171
3172 void
3173 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3174 {
3175 /* current version number */
3176 clabel->version = RF_COMPONENT_LABEL_VERSION;
3177 clabel->serial_number = raidPtr->serial_number;
3178 clabel->mod_counter = raidPtr->mod_counter;
3179 clabel->num_rows = 1;
3180 clabel->num_columns = raidPtr->numCol;
3181 clabel->clean = RF_RAID_DIRTY; /* not clean */
3182 clabel->status = rf_ds_optimal; /* "It's good!" */
3183
3184 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3185 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3186 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3187
3188 clabel->blockSize = raidPtr->bytesPerSector;
3189 clabel->numBlocks = raidPtr->sectorsPerDisk;
3190
3191 /* XXX not portable */
3192 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3193 clabel->maxOutstanding = raidPtr->maxOutstanding;
3194 clabel->autoconfigure = raidPtr->autoconfigure;
3195 clabel->root_partition = raidPtr->root_partition;
3196 clabel->last_unit = raidPtr->raidid;
3197 clabel->config_order = raidPtr->config_order;
3198 }
3199
3200 int
3201 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3202 {
3203 RF_Raid_t *raidPtr;
3204 RF_Config_t *config;
3205 int raidID;
3206 int retcode;
3207
3208 #if DEBUG
3209 printf("RAID autoconfigure\n");
3210 #endif
3211
3212 retcode = 0;
3213 *unit = -1;
3214
3215 /* 1. Create a config structure */
3216
3217 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3218 M_RAIDFRAME,
3219 M_NOWAIT);
3220 if (config==NULL) {
3221 printf("Out of mem!?!?\n");
3222 /* XXX do something more intelligent here. */
3223 return(1);
3224 }
3225
3226 memset(config, 0, sizeof(RF_Config_t));
3227
3228 /*
3229 2. Figure out what RAID ID this one is supposed to live at
3230 See if we can get the same RAID dev that it was configured
3231 on last time..
3232 */
3233
3234 raidID = cset->ac->clabel->last_unit;
3235 if ((raidID < 0) || (raidID >= numraid)) {
3236 /* let's not wander off into lala land. */
3237 raidID = numraid - 1;
3238 }
3239 if (raidPtrs[raidID]->valid != 0) {
3240
3241 /*
3242 Nope... Go looking for an alternative...
3243 Start high so we don't immediately use raid0 if that's
3244 not taken.
3245 */
3246
3247 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3248 if (raidPtrs[raidID]->valid == 0) {
3249 /* can use this one! */
3250 break;
3251 }
3252 }
3253 }
3254
3255 if (raidID < 0) {
3256 /* punt... */
3257 printf("Unable to auto configure this set!\n");
3258 printf("(Out of RAID devs!)\n");
3259 return(1);
3260 }
3261
3262 #if DEBUG
3263 printf("Configuring raid%d:\n",raidID);
3264 #endif
3265
3266 raidPtr = raidPtrs[raidID];
3267
3268 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3269 raidPtr->raidid = raidID;
3270 raidPtr->openings = RAIDOUTSTANDING;
3271
3272 /* 3. Build the configuration structure */
3273 rf_create_configuration(cset->ac, config, raidPtr);
3274
3275 /* 4. Do the configuration */
3276 retcode = rf_Configure(raidPtr, config, cset->ac);
3277
3278 if (retcode == 0) {
3279
3280 raidinit(raidPtrs[raidID]);
3281
3282 rf_markalldirty(raidPtrs[raidID]);
3283 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3284 if (cset->ac->clabel->root_partition==1) {
3285 /* everything configured just fine. Make a note
3286 that this set is eligible to be root. */
3287 cset->rootable = 1;
3288 /* XXX do this here? */
3289 raidPtrs[raidID]->root_partition = 1;
3290 }
3291 }
3292
3293 /* 5. Cleanup */
3294 free(config, M_RAIDFRAME);
3295
3296 *unit = raidID;
3297 return(retcode);
3298 }
3299
3300 void
3301 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3302 {
3303 struct buf *bp;
3304
3305 bp = (struct buf *)desc->bp;
3306 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3307 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3308 }
3309
3310 void
3311 rf_pool_init(struct pool *p, size_t size, char *w_chan,
3312 size_t min, size_t max)
3313 {
3314 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3315 pool_sethiwat(p, max);
3316 pool_prime(p, min);
3317 pool_setlowat(p, min);
3318 }
3319