rf_netbsdkintf.c revision 1.178.2.1.2.3 1 /* $NetBSD: rf_netbsdkintf.c,v 1.178.2.1.2.3 2005/04/06 12:17:58 tron Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.178.2.1.2.3 2005/04/06 12:17:58 tron Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * b_proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* XXX Not sure if the following should be replacing the raidPtrs above,
248 or if it should be used in conjunction with that...
249 */
250
251 struct raid_softc {
252 int sc_flags; /* flags */
253 int sc_cflags; /* configuration flags */
254 size_t sc_size; /* size of the raid device */
255 char sc_xname[20]; /* XXX external name */
256 struct disk sc_dkdev; /* generic disk device info */
257 struct bufq_state buf_queue; /* used for the device queue */
258 };
259 /* sc_flags */
260 #define RAIDF_INITED 0x01 /* unit has been initialized */
261 #define RAIDF_WLABEL 0x02 /* label area is writable */
262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
264 #define RAIDF_LOCKED 0x80 /* unit is locked */
265
266 #define raidunit(x) DISKUNIT(x)
267 int numraid = 0;
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294 struct raid_softc *raid_softc;
295
296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
297 struct disklabel *);
298 static void raidgetdisklabel(dev_t);
299 static void raidmakedisklabel(struct raid_softc *);
300
301 static int raidlock(struct raid_softc *);
302 static void raidunlock(struct raid_softc *);
303
304 static void rf_markalldirty(RF_Raid_t *);
305
306 struct device *raidrootdev;
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(struct device *self);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 static int rf_reasonable_label(RF_ComponentLabel_t *);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 int rf_auto_config_set(RF_ConfigSet_t *, int *);
326
327 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
328 allow autoconfig to take place.
329 Note that this is overridden by having
330 RAID_AUTOCONFIG as an option in the
331 kernel config file. */
332
333 struct RF_Pools_s rf_pools;
334
335 void
336 raidattach(int num)
337 {
338 int raidID;
339 int i, rc;
340
341 #ifdef DEBUG
342 printf("raidattach: Asked for %d units\n", num);
343 #endif
344
345 if (num <= 0) {
346 #ifdef DIAGNOSTIC
347 panic("raidattach: count <= 0");
348 #endif
349 return;
350 }
351 /* This is where all the initialization stuff gets done. */
352
353 numraid = num;
354
355 /* Make some space for requested number of units... */
356
357 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
358 if (raidPtrs == NULL) {
359 panic("raidPtrs is NULL!!");
360 }
361
362 /* Initialize the component buffer pool. */
363 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
364 "raidpl", num * RAIDOUTSTANDING,
365 2 * num * RAIDOUTSTANDING);
366
367 rf_mutex_init(&rf_sparet_wait_mutex);
368
369 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
370
371 for (i = 0; i < num; i++)
372 raidPtrs[i] = NULL;
373 rc = rf_BootRaidframe();
374 if (rc == 0)
375 printf("Kernelized RAIDframe activated\n");
376 else
377 panic("Serious error booting RAID!!");
378
379 /* put together some datastructures like the CCD device does.. This
380 * lets us lock the device and what-not when it gets opened. */
381
382 raid_softc = (struct raid_softc *)
383 malloc(num * sizeof(struct raid_softc),
384 M_RAIDFRAME, M_NOWAIT);
385 if (raid_softc == NULL) {
386 printf("WARNING: no memory for RAIDframe driver\n");
387 return;
388 }
389
390 memset(raid_softc, 0, num * sizeof(struct raid_softc));
391
392 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
393 M_RAIDFRAME, M_NOWAIT);
394 if (raidrootdev == NULL) {
395 panic("No memory for RAIDframe driver!!?!?!");
396 }
397
398 for (raidID = 0; raidID < num; raidID++) {
399 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
400
401 raidrootdev[raidID].dv_class = DV_DISK;
402 raidrootdev[raidID].dv_cfdata = NULL;
403 raidrootdev[raidID].dv_unit = raidID;
404 raidrootdev[raidID].dv_parent = NULL;
405 raidrootdev[raidID].dv_flags = 0;
406 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
407
408 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
409 (RF_Raid_t *));
410 if (raidPtrs[raidID] == NULL) {
411 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
412 numraid = raidID;
413 return;
414 }
415 }
416
417 #ifdef RAID_AUTOCONFIG
418 raidautoconfig = 1;
419 #endif
420
421 /*
422 * Register a finalizer which will be used to auto-config RAID
423 * sets once all real hardware devices have been found.
424 */
425 if (config_finalize_register(NULL, rf_autoconfig) != 0)
426 printf("WARNING: unable to register RAIDframe finalizer\n");
427 }
428
429 int
430 rf_autoconfig(struct device *self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (raidautoconfig == 0)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfig = 0;
440
441 /* 1. locate all RAID components on the system */
442 #ifdef DEBUG
443 printf("Searching for RAID components...\n");
444 #endif
445 ac_list = rf_find_raid_components();
446
447 /* 2. Sort them into their respective sets. */
448 config_sets = rf_create_auto_sets(ac_list);
449
450 /*
451 * 3. Evaluate each set andconfigure the valid ones.
452 * This gets done in rf_buildroothack().
453 */
454 rf_buildroothack(config_sets);
455
456 return (1);
457 }
458
459 void
460 rf_buildroothack(RF_ConfigSet_t *config_sets)
461 {
462 RF_ConfigSet_t *cset;
463 RF_ConfigSet_t *next_cset;
464 int retcode;
465 int raidID;
466 int rootID;
467 int num_root;
468
469 rootID = 0;
470 num_root = 0;
471 cset = config_sets;
472 while(cset != NULL ) {
473 next_cset = cset->next;
474 if (rf_have_enough_components(cset) &&
475 cset->ac->clabel->autoconfigure==1) {
476 retcode = rf_auto_config_set(cset,&raidID);
477 if (!retcode) {
478 if (cset->rootable) {
479 rootID = raidID;
480 num_root++;
481 }
482 } else {
483 /* The autoconfig didn't work :( */
484 #if DEBUG
485 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
486 #endif
487 rf_release_all_vps(cset);
488 }
489 } else {
490 /* we're not autoconfiguring this set...
491 release the associated resources */
492 rf_release_all_vps(cset);
493 }
494 /* cleanup */
495 rf_cleanup_config_set(cset);
496 cset = next_cset;
497 }
498
499 /* we found something bootable... */
500
501 if (num_root == 1) {
502 booted_device = &raidrootdev[rootID];
503 } else if (num_root > 1) {
504 /* we can't guess.. require the user to answer... */
505 boothowto |= RB_ASKNAME;
506 }
507 }
508
509
510 int
511 raidsize(dev_t dev)
512 {
513 struct raid_softc *rs;
514 struct disklabel *lp;
515 int part, unit, omask, size;
516
517 unit = raidunit(dev);
518 if (unit >= numraid)
519 return (-1);
520 rs = &raid_softc[unit];
521
522 if ((rs->sc_flags & RAIDF_INITED) == 0)
523 return (-1);
524
525 part = DISKPART(dev);
526 omask = rs->sc_dkdev.dk_openmask & (1 << part);
527 lp = rs->sc_dkdev.dk_label;
528
529 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
530 return (-1);
531
532 if (lp->d_partitions[part].p_fstype != FS_SWAP)
533 size = -1;
534 else
535 size = lp->d_partitions[part].p_size *
536 (lp->d_secsize / DEV_BSIZE);
537
538 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
539 return (-1);
540
541 return (size);
542
543 }
544
545 int
546 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
547 {
548 /* Not implemented. */
549 return ENXIO;
550 }
551 /* ARGSUSED */
552 int
553 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
554 {
555 int unit = raidunit(dev);
556 struct raid_softc *rs;
557 struct disklabel *lp;
558 int part, pmask;
559 int error = 0;
560
561 if (unit >= numraid)
562 return (ENXIO);
563 rs = &raid_softc[unit];
564
565 if ((error = raidlock(rs)) != 0)
566 return (error);
567 lp = rs->sc_dkdev.dk_label;
568
569 part = DISKPART(dev);
570 pmask = (1 << part);
571
572 if ((rs->sc_flags & RAIDF_INITED) &&
573 (rs->sc_dkdev.dk_openmask == 0))
574 raidgetdisklabel(dev);
575
576 /* make sure that this partition exists */
577
578 if (part != RAW_PART) {
579 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
580 ((part >= lp->d_npartitions) ||
581 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
582 error = ENXIO;
583 raidunlock(rs);
584 return (error);
585 }
586 }
587 /* Prevent this unit from being unconfigured while open. */
588 switch (fmt) {
589 case S_IFCHR:
590 rs->sc_dkdev.dk_copenmask |= pmask;
591 break;
592
593 case S_IFBLK:
594 rs->sc_dkdev.dk_bopenmask |= pmask;
595 break;
596 }
597
598 if ((rs->sc_dkdev.dk_openmask == 0) &&
599 ((rs->sc_flags & RAIDF_INITED) != 0)) {
600 /* First one... mark things as dirty... Note that we *MUST*
601 have done a configure before this. I DO NOT WANT TO BE
602 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
603 THAT THEY BELONG TOGETHER!!!!! */
604 /* XXX should check to see if we're only open for reading
605 here... If so, we needn't do this, but then need some
606 other way of keeping track of what's happened.. */
607
608 rf_markalldirty( raidPtrs[unit] );
609 }
610
611
612 rs->sc_dkdev.dk_openmask =
613 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
614
615 raidunlock(rs);
616
617 return (error);
618
619
620 }
621 /* ARGSUSED */
622 int
623 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
624 {
625 int unit = raidunit(dev);
626 struct raid_softc *rs;
627 int error = 0;
628 int part;
629
630 if (unit >= numraid)
631 return (ENXIO);
632 rs = &raid_softc[unit];
633
634 if ((error = raidlock(rs)) != 0)
635 return (error);
636
637 part = DISKPART(dev);
638
639 /* ...that much closer to allowing unconfiguration... */
640 switch (fmt) {
641 case S_IFCHR:
642 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
643 break;
644
645 case S_IFBLK:
646 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
647 break;
648 }
649 rs->sc_dkdev.dk_openmask =
650 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
651
652 if ((rs->sc_dkdev.dk_openmask == 0) &&
653 ((rs->sc_flags & RAIDF_INITED) != 0)) {
654 /* Last one... device is not unconfigured yet.
655 Device shutdown has taken care of setting the
656 clean bits if RAIDF_INITED is not set
657 mark things as clean... */
658
659 rf_update_component_labels(raidPtrs[unit],
660 RF_FINAL_COMPONENT_UPDATE);
661 if (doing_shutdown) {
662 /* last one, and we're going down, so
663 lights out for this RAID set too. */
664 error = rf_Shutdown(raidPtrs[unit]);
665
666 /* It's no longer initialized... */
667 rs->sc_flags &= ~RAIDF_INITED;
668
669 /* Detach the disk. */
670 disk_detach(&rs->sc_dkdev);
671 }
672 }
673
674 raidunlock(rs);
675 return (0);
676
677 }
678
679 void
680 raidstrategy(struct buf *bp)
681 {
682 int s;
683
684 unsigned int raidID = raidunit(bp->b_dev);
685 RF_Raid_t *raidPtr;
686 struct raid_softc *rs = &raid_softc[raidID];
687 int wlabel;
688
689 if ((rs->sc_flags & RAIDF_INITED) ==0) {
690 bp->b_error = ENXIO;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 if (raidID >= numraid || !raidPtrs[raidID]) {
697 bp->b_error = ENODEV;
698 bp->b_flags |= B_ERROR;
699 bp->b_resid = bp->b_bcount;
700 biodone(bp);
701 return;
702 }
703 raidPtr = raidPtrs[raidID];
704 if (!raidPtr->valid) {
705 bp->b_error = ENODEV;
706 bp->b_flags |= B_ERROR;
707 bp->b_resid = bp->b_bcount;
708 biodone(bp);
709 return;
710 }
711 if (bp->b_bcount == 0) {
712 db1_printf(("b_bcount is zero..\n"));
713 biodone(bp);
714 return;
715 }
716
717 /*
718 * Do bounds checking and adjust transfer. If there's an
719 * error, the bounds check will flag that for us.
720 */
721
722 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
723 if (DISKPART(bp->b_dev) != RAW_PART)
724 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
725 db1_printf(("Bounds check failed!!:%d %d\n",
726 (int) bp->b_blkno, (int) wlabel));
727 biodone(bp);
728 return;
729 }
730 s = splbio();
731
732 bp->b_resid = 0;
733
734 /* stuff it onto our queue */
735 BUFQ_PUT(&rs->buf_queue, bp);
736
737 raidstart(raidPtrs[raidID]);
738
739 splx(s);
740 }
741 /* ARGSUSED */
742 int
743 raidread(dev_t dev, struct uio *uio, int flags)
744 {
745 int unit = raidunit(dev);
746 struct raid_softc *rs;
747
748 if (unit >= numraid)
749 return (ENXIO);
750 rs = &raid_softc[unit];
751
752 if ((rs->sc_flags & RAIDF_INITED) == 0)
753 return (ENXIO);
754
755 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
756
757 }
758 /* ARGSUSED */
759 int
760 raidwrite(dev_t dev, struct uio *uio, int flags)
761 {
762 int unit = raidunit(dev);
763 struct raid_softc *rs;
764
765 if (unit >= numraid)
766 return (ENXIO);
767 rs = &raid_softc[unit];
768
769 if ((rs->sc_flags & RAIDF_INITED) == 0)
770 return (ENXIO);
771
772 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
773
774 }
775
776 int
777 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
778 {
779 int unit = raidunit(dev);
780 int error = 0;
781 int part, pmask;
782 struct raid_softc *rs;
783 RF_Config_t *k_cfg, *u_cfg;
784 RF_Raid_t *raidPtr;
785 RF_RaidDisk_t *diskPtr;
786 RF_AccTotals_t *totals;
787 RF_DeviceConfig_t *d_cfg, **ucfgp;
788 u_char *specific_buf;
789 int retcode = 0;
790 int column;
791 int raidid;
792 struct rf_recon_req *rrcopy, *rr;
793 RF_ComponentLabel_t *clabel;
794 RF_ComponentLabel_t ci_label;
795 RF_ComponentLabel_t **clabel_ptr;
796 RF_SingleComponent_t *sparePtr,*componentPtr;
797 RF_SingleComponent_t hot_spare;
798 RF_SingleComponent_t component;
799 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
800 int i, j, d;
801 #ifdef __HAVE_OLD_DISKLABEL
802 struct disklabel newlabel;
803 #endif
804
805 if (unit >= numraid)
806 return (ENXIO);
807 rs = &raid_softc[unit];
808 raidPtr = raidPtrs[unit];
809
810 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
811 (int) DISKPART(dev), (int) unit, (int) cmd));
812
813 /* Must be open for writes for these commands... */
814 switch (cmd) {
815 case DIOCSDINFO:
816 case DIOCWDINFO:
817 #ifdef __HAVE_OLD_DISKLABEL
818 case ODIOCWDINFO:
819 case ODIOCSDINFO:
820 #endif
821 case DIOCWLABEL:
822 if ((flag & FWRITE) == 0)
823 return (EBADF);
824 }
825
826 /* Must be initialized for these... */
827 switch (cmd) {
828 case DIOCGDINFO:
829 case DIOCSDINFO:
830 case DIOCWDINFO:
831 #ifdef __HAVE_OLD_DISKLABEL
832 case ODIOCGDINFO:
833 case ODIOCWDINFO:
834 case ODIOCSDINFO:
835 case ODIOCGDEFLABEL:
836 #endif
837 case DIOCGPART:
838 case DIOCWLABEL:
839 case DIOCGDEFLABEL:
840 case RAIDFRAME_SHUTDOWN:
841 case RAIDFRAME_REWRITEPARITY:
842 case RAIDFRAME_GET_INFO:
843 case RAIDFRAME_RESET_ACCTOTALS:
844 case RAIDFRAME_GET_ACCTOTALS:
845 case RAIDFRAME_KEEP_ACCTOTALS:
846 case RAIDFRAME_GET_SIZE:
847 case RAIDFRAME_FAIL_DISK:
848 case RAIDFRAME_COPYBACK:
849 case RAIDFRAME_CHECK_RECON_STATUS:
850 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
851 case RAIDFRAME_GET_COMPONENT_LABEL:
852 case RAIDFRAME_SET_COMPONENT_LABEL:
853 case RAIDFRAME_ADD_HOT_SPARE:
854 case RAIDFRAME_REMOVE_HOT_SPARE:
855 case RAIDFRAME_INIT_LABELS:
856 case RAIDFRAME_REBUILD_IN_PLACE:
857 case RAIDFRAME_CHECK_PARITY:
858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
860 case RAIDFRAME_CHECK_COPYBACK_STATUS:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
862 case RAIDFRAME_SET_AUTOCONFIG:
863 case RAIDFRAME_SET_ROOT:
864 case RAIDFRAME_DELETE_COMPONENT:
865 case RAIDFRAME_INCORPORATE_HOT_SPARE:
866 if ((rs->sc_flags & RAIDF_INITED) == 0)
867 return (ENXIO);
868 }
869
870 switch (cmd) {
871
872 /* configure the system */
873 case RAIDFRAME_CONFIGURE:
874
875 if (raidPtr->valid) {
876 /* There is a valid RAID set running on this unit! */
877 printf("raid%d: Device already configured!\n",unit);
878 return(EINVAL);
879 }
880
881 /* copy-in the configuration information */
882 /* data points to a pointer to the configuration structure */
883
884 u_cfg = *((RF_Config_t **) data);
885 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
886 if (k_cfg == NULL) {
887 return (ENOMEM);
888 }
889 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
890 if (retcode) {
891 RF_Free(k_cfg, sizeof(RF_Config_t));
892 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
893 retcode));
894 return (retcode);
895 }
896 /* allocate a buffer for the layout-specific data, and copy it
897 * in */
898 if (k_cfg->layoutSpecificSize) {
899 if (k_cfg->layoutSpecificSize > 10000) {
900 /* sanity check */
901 RF_Free(k_cfg, sizeof(RF_Config_t));
902 return (EINVAL);
903 }
904 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
905 (u_char *));
906 if (specific_buf == NULL) {
907 RF_Free(k_cfg, sizeof(RF_Config_t));
908 return (ENOMEM);
909 }
910 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
911 k_cfg->layoutSpecificSize);
912 if (retcode) {
913 RF_Free(k_cfg, sizeof(RF_Config_t));
914 RF_Free(specific_buf,
915 k_cfg->layoutSpecificSize);
916 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
917 retcode));
918 return (retcode);
919 }
920 } else
921 specific_buf = NULL;
922 k_cfg->layoutSpecific = specific_buf;
923
924 /* should do some kind of sanity check on the configuration.
925 * Store the sum of all the bytes in the last byte? */
926
927 /* configure the system */
928
929 /*
930 * Clear the entire RAID descriptor, just to make sure
931 * there is no stale data left in the case of a
932 * reconfiguration
933 */
934 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
935 raidPtr->raidid = unit;
936
937 retcode = rf_Configure(raidPtr, k_cfg, NULL);
938
939 if (retcode == 0) {
940
941 /* allow this many simultaneous IO's to
942 this RAID device */
943 raidPtr->openings = RAIDOUTSTANDING;
944
945 raidinit(raidPtr);
946 rf_markalldirty(raidPtr);
947 }
948 /* free the buffers. No return code here. */
949 if (k_cfg->layoutSpecificSize) {
950 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
951 }
952 RF_Free(k_cfg, sizeof(RF_Config_t));
953
954 return (retcode);
955
956 /* shutdown the system */
957 case RAIDFRAME_SHUTDOWN:
958
959 if ((error = raidlock(rs)) != 0)
960 return (error);
961
962 /*
963 * If somebody has a partition mounted, we shouldn't
964 * shutdown.
965 */
966
967 part = DISKPART(dev);
968 pmask = (1 << part);
969 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
970 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
971 (rs->sc_dkdev.dk_copenmask & pmask))) {
972 raidunlock(rs);
973 return (EBUSY);
974 }
975
976 retcode = rf_Shutdown(raidPtr);
977
978 /* It's no longer initialized... */
979 rs->sc_flags &= ~RAIDF_INITED;
980
981 /* Detach the disk. */
982 disk_detach(&rs->sc_dkdev);
983
984 raidunlock(rs);
985
986 return (retcode);
987 case RAIDFRAME_GET_COMPONENT_LABEL:
988 clabel_ptr = (RF_ComponentLabel_t **) data;
989 /* need to read the component label for the disk indicated
990 by row,column in clabel */
991
992 /* For practice, let's get it directly fromdisk, rather
993 than from the in-core copy */
994 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
995 (RF_ComponentLabel_t *));
996 if (clabel == NULL)
997 return (ENOMEM);
998
999 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1000
1001 retcode = copyin( *clabel_ptr, clabel,
1002 sizeof(RF_ComponentLabel_t));
1003
1004 if (retcode) {
1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1006 return(retcode);
1007 }
1008
1009 clabel->row = 0; /* Don't allow looking at anything else.*/
1010
1011 column = clabel->column;
1012
1013 if ((column < 0) || (column >= raidPtr->numCol +
1014 raidPtr->numSpare)) {
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return(EINVAL);
1017 }
1018
1019 raidread_component_label(raidPtr->Disks[column].dev,
1020 raidPtr->raid_cinfo[column].ci_vp,
1021 clabel );
1022
1023 retcode = copyout(clabel, *clabel_ptr,
1024 sizeof(RF_ComponentLabel_t));
1025 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1026 return (retcode);
1027
1028 case RAIDFRAME_SET_COMPONENT_LABEL:
1029 clabel = (RF_ComponentLabel_t *) data;
1030
1031 /* XXX check the label for valid stuff... */
1032 /* Note that some things *should not* get modified --
1033 the user should be re-initing the labels instead of
1034 trying to patch things.
1035 */
1036
1037 raidid = raidPtr->raidid;
1038 #if DEBUG
1039 printf("raid%d: Got component label:\n", raidid);
1040 printf("raid%d: Version: %d\n", raidid, clabel->version);
1041 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1042 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1043 printf("raid%d: Column: %d\n", raidid, clabel->column);
1044 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1045 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1046 printf("raid%d: Status: %d\n", raidid, clabel->status);
1047 #endif
1048 clabel->row = 0;
1049 column = clabel->column;
1050
1051 if ((column < 0) || (column >= raidPtr->numCol)) {
1052 return(EINVAL);
1053 }
1054
1055 /* XXX this isn't allowed to do anything for now :-) */
1056
1057 /* XXX and before it is, we need to fill in the rest
1058 of the fields!?!?!?! */
1059 #if 0
1060 raidwrite_component_label(
1061 raidPtr->Disks[column].dev,
1062 raidPtr->raid_cinfo[column].ci_vp,
1063 clabel );
1064 #endif
1065 return (0);
1066
1067 case RAIDFRAME_INIT_LABELS:
1068 clabel = (RF_ComponentLabel_t *) data;
1069 /*
1070 we only want the serial number from
1071 the above. We get all the rest of the information
1072 from the config that was used to create this RAID
1073 set.
1074 */
1075
1076 raidPtr->serial_number = clabel->serial_number;
1077
1078 raid_init_component_label(raidPtr, &ci_label);
1079 ci_label.serial_number = clabel->serial_number;
1080 ci_label.row = 0; /* we dont' pretend to support more */
1081
1082 for(column=0;column<raidPtr->numCol;column++) {
1083 diskPtr = &raidPtr->Disks[column];
1084 if (!RF_DEAD_DISK(diskPtr->status)) {
1085 ci_label.partitionSize = diskPtr->partitionSize;
1086 ci_label.column = column;
1087 raidwrite_component_label(
1088 raidPtr->Disks[column].dev,
1089 raidPtr->raid_cinfo[column].ci_vp,
1090 &ci_label );
1091 }
1092 }
1093
1094 return (retcode);
1095 case RAIDFRAME_SET_AUTOCONFIG:
1096 d = rf_set_autoconfig(raidPtr, *(int *) data);
1097 printf("raid%d: New autoconfig value is: %d\n",
1098 raidPtr->raidid, d);
1099 *(int *) data = d;
1100 return (retcode);
1101
1102 case RAIDFRAME_SET_ROOT:
1103 d = rf_set_rootpartition(raidPtr, *(int *) data);
1104 printf("raid%d: New rootpartition value is: %d\n",
1105 raidPtr->raidid, d);
1106 *(int *) data = d;
1107 return (retcode);
1108
1109 /* initialize all parity */
1110 case RAIDFRAME_REWRITEPARITY:
1111
1112 if (raidPtr->Layout.map->faultsTolerated == 0) {
1113 /* Parity for RAID 0 is trivially correct */
1114 raidPtr->parity_good = RF_RAID_CLEAN;
1115 return(0);
1116 }
1117
1118 if (raidPtr->parity_rewrite_in_progress == 1) {
1119 /* Re-write is already in progress! */
1120 return(EINVAL);
1121 }
1122
1123 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1124 rf_RewriteParityThread,
1125 raidPtr,"raid_parity");
1126 return (retcode);
1127
1128
1129 case RAIDFRAME_ADD_HOT_SPARE:
1130 sparePtr = (RF_SingleComponent_t *) data;
1131 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1132 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1133 return(retcode);
1134
1135 case RAIDFRAME_REMOVE_HOT_SPARE:
1136 return(retcode);
1137
1138 case RAIDFRAME_DELETE_COMPONENT:
1139 componentPtr = (RF_SingleComponent_t *)data;
1140 memcpy( &component, componentPtr,
1141 sizeof(RF_SingleComponent_t));
1142 retcode = rf_delete_component(raidPtr, &component);
1143 return(retcode);
1144
1145 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1146 componentPtr = (RF_SingleComponent_t *)data;
1147 memcpy( &component, componentPtr,
1148 sizeof(RF_SingleComponent_t));
1149 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1150 return(retcode);
1151
1152 case RAIDFRAME_REBUILD_IN_PLACE:
1153
1154 if (raidPtr->Layout.map->faultsTolerated == 0) {
1155 /* Can't do this on a RAID 0!! */
1156 return(EINVAL);
1157 }
1158
1159 if (raidPtr->recon_in_progress == 1) {
1160 /* a reconstruct is already in progress! */
1161 return(EINVAL);
1162 }
1163
1164 componentPtr = (RF_SingleComponent_t *) data;
1165 memcpy( &component, componentPtr,
1166 sizeof(RF_SingleComponent_t));
1167 component.row = 0; /* we don't support any more */
1168 column = component.column;
1169
1170 if ((column < 0) || (column >= raidPtr->numCol)) {
1171 return(EINVAL);
1172 }
1173
1174 RF_LOCK_MUTEX(raidPtr->mutex);
1175 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1176 (raidPtr->numFailures > 0)) {
1177 /* XXX 0 above shouldn't be constant!!! */
1178 /* some component other than this has failed.
1179 Let's not make things worse than they already
1180 are... */
1181 printf("raid%d: Unable to reconstruct to disk at:\n",
1182 raidPtr->raidid);
1183 printf("raid%d: Col: %d Too many failures.\n",
1184 raidPtr->raidid, column);
1185 RF_UNLOCK_MUTEX(raidPtr->mutex);
1186 return (EINVAL);
1187 }
1188 if (raidPtr->Disks[column].status ==
1189 rf_ds_reconstructing) {
1190 printf("raid%d: Unable to reconstruct to disk at:\n",
1191 raidPtr->raidid);
1192 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1193
1194 RF_UNLOCK_MUTEX(raidPtr->mutex);
1195 return (EINVAL);
1196 }
1197 if (raidPtr->Disks[column].status == rf_ds_spared) {
1198 RF_UNLOCK_MUTEX(raidPtr->mutex);
1199 return (EINVAL);
1200 }
1201 RF_UNLOCK_MUTEX(raidPtr->mutex);
1202
1203 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1204 if (rrcopy == NULL)
1205 return(ENOMEM);
1206
1207 rrcopy->raidPtr = (void *) raidPtr;
1208 rrcopy->col = column;
1209
1210 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1211 rf_ReconstructInPlaceThread,
1212 rrcopy,"raid_reconip");
1213 return(retcode);
1214
1215 case RAIDFRAME_GET_INFO:
1216 if (!raidPtr->valid)
1217 return (ENODEV);
1218 ucfgp = (RF_DeviceConfig_t **) data;
1219 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1220 (RF_DeviceConfig_t *));
1221 if (d_cfg == NULL)
1222 return (ENOMEM);
1223 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1224 d_cfg->rows = 1; /* there is only 1 row now */
1225 d_cfg->cols = raidPtr->numCol;
1226 d_cfg->ndevs = raidPtr->numCol;
1227 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229 return (ENOMEM);
1230 }
1231 d_cfg->nspares = raidPtr->numSpare;
1232 if (d_cfg->nspares >= RF_MAX_DISKS) {
1233 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1234 return (ENOMEM);
1235 }
1236 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1237 d = 0;
1238 for (j = 0; j < d_cfg->cols; j++) {
1239 d_cfg->devs[d] = raidPtr->Disks[j];
1240 d++;
1241 }
1242 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1243 d_cfg->spares[i] = raidPtr->Disks[j];
1244 }
1245 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1246 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1247
1248 return (retcode);
1249
1250 case RAIDFRAME_CHECK_PARITY:
1251 *(int *) data = raidPtr->parity_good;
1252 return (0);
1253
1254 case RAIDFRAME_RESET_ACCTOTALS:
1255 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1256 return (0);
1257
1258 case RAIDFRAME_GET_ACCTOTALS:
1259 totals = (RF_AccTotals_t *) data;
1260 *totals = raidPtr->acc_totals;
1261 return (0);
1262
1263 case RAIDFRAME_KEEP_ACCTOTALS:
1264 raidPtr->keep_acc_totals = *(int *)data;
1265 return (0);
1266
1267 case RAIDFRAME_GET_SIZE:
1268 *(int *) data = raidPtr->totalSectors;
1269 return (0);
1270
1271 /* fail a disk & optionally start reconstruction */
1272 case RAIDFRAME_FAIL_DISK:
1273
1274 if (raidPtr->Layout.map->faultsTolerated == 0) {
1275 /* Can't do this on a RAID 0!! */
1276 return(EINVAL);
1277 }
1278
1279 rr = (struct rf_recon_req *) data;
1280 rr->row = 0;
1281 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1282 return (EINVAL);
1283
1284
1285 RF_LOCK_MUTEX(raidPtr->mutex);
1286 if (raidPtr->status == rf_rs_reconstructing) {
1287 /* you can't fail a disk while we're reconstructing! */
1288 /* XXX wrong for RAID6 */
1289 RF_UNLOCK_MUTEX(raidPtr->mutex);
1290 return (EINVAL);
1291 }
1292 if ((raidPtr->Disks[rr->col].status ==
1293 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1294 /* some other component has failed. Let's not make
1295 things worse. XXX wrong for RAID6 */
1296 RF_UNLOCK_MUTEX(raidPtr->mutex);
1297 return (EINVAL);
1298 }
1299 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1300 /* Can't fail a spared disk! */
1301 RF_UNLOCK_MUTEX(raidPtr->mutex);
1302 return (EINVAL);
1303 }
1304 RF_UNLOCK_MUTEX(raidPtr->mutex);
1305
1306 /* make a copy of the recon request so that we don't rely on
1307 * the user's buffer */
1308 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1309 if (rrcopy == NULL)
1310 return(ENOMEM);
1311 memcpy(rrcopy, rr, sizeof(*rr));
1312 rrcopy->raidPtr = (void *) raidPtr;
1313
1314 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1315 rf_ReconThread,
1316 rrcopy,"raid_recon");
1317 return (0);
1318
1319 /* invoke a copyback operation after recon on whatever disk
1320 * needs it, if any */
1321 case RAIDFRAME_COPYBACK:
1322
1323 if (raidPtr->Layout.map->faultsTolerated == 0) {
1324 /* This makes no sense on a RAID 0!! */
1325 return(EINVAL);
1326 }
1327
1328 if (raidPtr->copyback_in_progress == 1) {
1329 /* Copyback is already in progress! */
1330 return(EINVAL);
1331 }
1332
1333 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1334 rf_CopybackThread,
1335 raidPtr,"raid_copyback");
1336 return (retcode);
1337
1338 /* return the percentage completion of reconstruction */
1339 case RAIDFRAME_CHECK_RECON_STATUS:
1340 if (raidPtr->Layout.map->faultsTolerated == 0) {
1341 /* This makes no sense on a RAID 0, so tell the
1342 user it's done. */
1343 *(int *) data = 100;
1344 return(0);
1345 }
1346 if (raidPtr->status != rf_rs_reconstructing)
1347 *(int *) data = 100;
1348 else {
1349 if (raidPtr->reconControl->numRUsTotal > 0) {
1350 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1351 } else {
1352 *(int *) data = 0;
1353 }
1354 }
1355 return (0);
1356 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1357 progressInfoPtr = (RF_ProgressInfo_t **) data;
1358 if (raidPtr->status != rf_rs_reconstructing) {
1359 progressInfo.remaining = 0;
1360 progressInfo.completed = 100;
1361 progressInfo.total = 100;
1362 } else {
1363 progressInfo.total =
1364 raidPtr->reconControl->numRUsTotal;
1365 progressInfo.completed =
1366 raidPtr->reconControl->numRUsComplete;
1367 progressInfo.remaining = progressInfo.total -
1368 progressInfo.completed;
1369 }
1370 retcode = copyout(&progressInfo, *progressInfoPtr,
1371 sizeof(RF_ProgressInfo_t));
1372 return (retcode);
1373
1374 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1375 if (raidPtr->Layout.map->faultsTolerated == 0) {
1376 /* This makes no sense on a RAID 0, so tell the
1377 user it's done. */
1378 *(int *) data = 100;
1379 return(0);
1380 }
1381 if (raidPtr->parity_rewrite_in_progress == 1) {
1382 *(int *) data = 100 *
1383 raidPtr->parity_rewrite_stripes_done /
1384 raidPtr->Layout.numStripe;
1385 } else {
1386 *(int *) data = 100;
1387 }
1388 return (0);
1389
1390 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1391 progressInfoPtr = (RF_ProgressInfo_t **) data;
1392 if (raidPtr->parity_rewrite_in_progress == 1) {
1393 progressInfo.total = raidPtr->Layout.numStripe;
1394 progressInfo.completed =
1395 raidPtr->parity_rewrite_stripes_done;
1396 progressInfo.remaining = progressInfo.total -
1397 progressInfo.completed;
1398 } else {
1399 progressInfo.remaining = 0;
1400 progressInfo.completed = 100;
1401 progressInfo.total = 100;
1402 }
1403 retcode = copyout(&progressInfo, *progressInfoPtr,
1404 sizeof(RF_ProgressInfo_t));
1405 return (retcode);
1406
1407 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1408 if (raidPtr->Layout.map->faultsTolerated == 0) {
1409 /* This makes no sense on a RAID 0 */
1410 *(int *) data = 100;
1411 return(0);
1412 }
1413 if (raidPtr->copyback_in_progress == 1) {
1414 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1415 raidPtr->Layout.numStripe;
1416 } else {
1417 *(int *) data = 100;
1418 }
1419 return (0);
1420
1421 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1422 progressInfoPtr = (RF_ProgressInfo_t **) data;
1423 if (raidPtr->copyback_in_progress == 1) {
1424 progressInfo.total = raidPtr->Layout.numStripe;
1425 progressInfo.completed =
1426 raidPtr->copyback_stripes_done;
1427 progressInfo.remaining = progressInfo.total -
1428 progressInfo.completed;
1429 } else {
1430 progressInfo.remaining = 0;
1431 progressInfo.completed = 100;
1432 progressInfo.total = 100;
1433 }
1434 retcode = copyout(&progressInfo, *progressInfoPtr,
1435 sizeof(RF_ProgressInfo_t));
1436 return (retcode);
1437
1438 /* the sparetable daemon calls this to wait for the kernel to
1439 * need a spare table. this ioctl does not return until a
1440 * spare table is needed. XXX -- calling mpsleep here in the
1441 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1442 * -- I should either compute the spare table in the kernel,
1443 * or have a different -- XXX XXX -- interface (a different
1444 * character device) for delivering the table -- XXX */
1445 #if 0
1446 case RAIDFRAME_SPARET_WAIT:
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 while (!rf_sparet_wait_queue)
1449 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1450 waitreq = rf_sparet_wait_queue;
1451 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1452 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1453
1454 /* structure assignment */
1455 *((RF_SparetWait_t *) data) = *waitreq;
1456
1457 RF_Free(waitreq, sizeof(*waitreq));
1458 return (0);
1459
1460 /* wakes up a process waiting on SPARET_WAIT and puts an error
1461 * code in it that will cause the dameon to exit */
1462 case RAIDFRAME_ABORT_SPARET_WAIT:
1463 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1464 waitreq->fcol = -1;
1465 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1466 waitreq->next = rf_sparet_wait_queue;
1467 rf_sparet_wait_queue = waitreq;
1468 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1469 wakeup(&rf_sparet_wait_queue);
1470 return (0);
1471
1472 /* used by the spare table daemon to deliver a spare table
1473 * into the kernel */
1474 case RAIDFRAME_SEND_SPARET:
1475
1476 /* install the spare table */
1477 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1478
1479 /* respond to the requestor. the return status of the spare
1480 * table installation is passed in the "fcol" field */
1481 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1482 waitreq->fcol = retcode;
1483 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1484 waitreq->next = rf_sparet_resp_queue;
1485 rf_sparet_resp_queue = waitreq;
1486 wakeup(&rf_sparet_resp_queue);
1487 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1488
1489 return (retcode);
1490 #endif
1491
1492 default:
1493 break; /* fall through to the os-specific code below */
1494
1495 }
1496
1497 if (!raidPtr->valid)
1498 return (EINVAL);
1499
1500 /*
1501 * Add support for "regular" device ioctls here.
1502 */
1503
1504 switch (cmd) {
1505 case DIOCGDINFO:
1506 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1507 break;
1508 #ifdef __HAVE_OLD_DISKLABEL
1509 case ODIOCGDINFO:
1510 newlabel = *(rs->sc_dkdev.dk_label);
1511 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1512 return ENOTTY;
1513 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1514 break;
1515 #endif
1516
1517 case DIOCGPART:
1518 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1519 ((struct partinfo *) data)->part =
1520 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1521 break;
1522
1523 case DIOCWDINFO:
1524 case DIOCSDINFO:
1525 #ifdef __HAVE_OLD_DISKLABEL
1526 case ODIOCWDINFO:
1527 case ODIOCSDINFO:
1528 #endif
1529 {
1530 struct disklabel *lp;
1531 #ifdef __HAVE_OLD_DISKLABEL
1532 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1533 memset(&newlabel, 0, sizeof newlabel);
1534 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1535 lp = &newlabel;
1536 } else
1537 #endif
1538 lp = (struct disklabel *)data;
1539
1540 if ((error = raidlock(rs)) != 0)
1541 return (error);
1542
1543 rs->sc_flags |= RAIDF_LABELLING;
1544
1545 error = setdisklabel(rs->sc_dkdev.dk_label,
1546 lp, 0, rs->sc_dkdev.dk_cpulabel);
1547 if (error == 0) {
1548 if (cmd == DIOCWDINFO
1549 #ifdef __HAVE_OLD_DISKLABEL
1550 || cmd == ODIOCWDINFO
1551 #endif
1552 )
1553 error = writedisklabel(RAIDLABELDEV(dev),
1554 raidstrategy, rs->sc_dkdev.dk_label,
1555 rs->sc_dkdev.dk_cpulabel);
1556 }
1557 rs->sc_flags &= ~RAIDF_LABELLING;
1558
1559 raidunlock(rs);
1560
1561 if (error)
1562 return (error);
1563 break;
1564 }
1565
1566 case DIOCWLABEL:
1567 if (*(int *) data != 0)
1568 rs->sc_flags |= RAIDF_WLABEL;
1569 else
1570 rs->sc_flags &= ~RAIDF_WLABEL;
1571 break;
1572
1573 case DIOCGDEFLABEL:
1574 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1575 break;
1576
1577 #ifdef __HAVE_OLD_DISKLABEL
1578 case ODIOCGDEFLABEL:
1579 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1580 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1581 return ENOTTY;
1582 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1583 break;
1584 #endif
1585
1586 default:
1587 retcode = ENOTTY;
1588 }
1589 return (retcode);
1590
1591 }
1592
1593
1594 /* raidinit -- complete the rest of the initialization for the
1595 RAIDframe device. */
1596
1597
1598 static void
1599 raidinit(RF_Raid_t *raidPtr)
1600 {
1601 struct raid_softc *rs;
1602 int unit;
1603
1604 unit = raidPtr->raidid;
1605
1606 rs = &raid_softc[unit];
1607
1608 /* XXX should check return code first... */
1609 rs->sc_flags |= RAIDF_INITED;
1610
1611 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1612
1613 rs->sc_dkdev.dk_name = rs->sc_xname;
1614
1615 /* disk_attach actually creates space for the CPU disklabel, among
1616 * other things, so it's critical to call this *BEFORE* we try putzing
1617 * with disklabels. */
1618
1619 disk_attach(&rs->sc_dkdev);
1620
1621 /* XXX There may be a weird interaction here between this, and
1622 * protectedSectors, as used in RAIDframe. */
1623
1624 rs->sc_size = raidPtr->totalSectors;
1625 }
1626 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1627 /* wake up the daemon & tell it to get us a spare table
1628 * XXX
1629 * the entries in the queues should be tagged with the raidPtr
1630 * so that in the extremely rare case that two recons happen at once,
1631 * we know for which device were requesting a spare table
1632 * XXX
1633 *
1634 * XXX This code is not currently used. GO
1635 */
1636 int
1637 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1638 {
1639 int retcode;
1640
1641 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1642 req->next = rf_sparet_wait_queue;
1643 rf_sparet_wait_queue = req;
1644 wakeup(&rf_sparet_wait_queue);
1645
1646 /* mpsleep unlocks the mutex */
1647 while (!rf_sparet_resp_queue) {
1648 tsleep(&rf_sparet_resp_queue, PRIBIO,
1649 "raidframe getsparetable", 0);
1650 }
1651 req = rf_sparet_resp_queue;
1652 rf_sparet_resp_queue = req->next;
1653 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1654
1655 retcode = req->fcol;
1656 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1657 * alloc'd */
1658 return (retcode);
1659 }
1660 #endif
1661
1662 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1663 * bp & passes it down.
1664 * any calls originating in the kernel must use non-blocking I/O
1665 * do some extra sanity checking to return "appropriate" error values for
1666 * certain conditions (to make some standard utilities work)
1667 *
1668 * Formerly known as: rf_DoAccessKernel
1669 */
1670 void
1671 raidstart(RF_Raid_t *raidPtr)
1672 {
1673 RF_SectorCount_t num_blocks, pb, sum;
1674 RF_RaidAddr_t raid_addr;
1675 struct partition *pp;
1676 daddr_t blocknum;
1677 int unit;
1678 struct raid_softc *rs;
1679 int do_async;
1680 struct buf *bp;
1681 int rc;
1682
1683 unit = raidPtr->raidid;
1684 rs = &raid_softc[unit];
1685
1686 /* quick check to see if anything has died recently */
1687 RF_LOCK_MUTEX(raidPtr->mutex);
1688 if (raidPtr->numNewFailures > 0) {
1689 RF_UNLOCK_MUTEX(raidPtr->mutex);
1690 rf_update_component_labels(raidPtr,
1691 RF_NORMAL_COMPONENT_UPDATE);
1692 RF_LOCK_MUTEX(raidPtr->mutex);
1693 raidPtr->numNewFailures--;
1694 }
1695
1696 /* Check to see if we're at the limit... */
1697 while (raidPtr->openings > 0) {
1698 RF_UNLOCK_MUTEX(raidPtr->mutex);
1699
1700 /* get the next item, if any, from the queue */
1701 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1702 /* nothing more to do */
1703 return;
1704 }
1705
1706 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1707 * partition.. Need to make it absolute to the underlying
1708 * device.. */
1709
1710 blocknum = bp->b_blkno;
1711 if (DISKPART(bp->b_dev) != RAW_PART) {
1712 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1713 blocknum += pp->p_offset;
1714 }
1715
1716 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1717 (int) blocknum));
1718
1719 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1720 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1721
1722 /* *THIS* is where we adjust what block we're going to...
1723 * but DO NOT TOUCH bp->b_blkno!!! */
1724 raid_addr = blocknum;
1725
1726 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1727 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1728 sum = raid_addr + num_blocks + pb;
1729 if (1 || rf_debugKernelAccess) {
1730 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1731 (int) raid_addr, (int) sum, (int) num_blocks,
1732 (int) pb, (int) bp->b_resid));
1733 }
1734 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1735 || (sum < num_blocks) || (sum < pb)) {
1736 bp->b_error = ENOSPC;
1737 bp->b_flags |= B_ERROR;
1738 bp->b_resid = bp->b_bcount;
1739 biodone(bp);
1740 RF_LOCK_MUTEX(raidPtr->mutex);
1741 continue;
1742 }
1743 /*
1744 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1745 */
1746
1747 if (bp->b_bcount & raidPtr->sectorMask) {
1748 bp->b_error = EINVAL;
1749 bp->b_flags |= B_ERROR;
1750 bp->b_resid = bp->b_bcount;
1751 biodone(bp);
1752 RF_LOCK_MUTEX(raidPtr->mutex);
1753 continue;
1754
1755 }
1756 db1_printf(("Calling DoAccess..\n"));
1757
1758
1759 RF_LOCK_MUTEX(raidPtr->mutex);
1760 raidPtr->openings--;
1761 RF_UNLOCK_MUTEX(raidPtr->mutex);
1762
1763 /*
1764 * Everything is async.
1765 */
1766 do_async = 1;
1767
1768 disk_busy(&rs->sc_dkdev);
1769
1770 /* XXX we're still at splbio() here... do we *really*
1771 need to be? */
1772
1773 /* don't ever condition on bp->b_flags & B_WRITE.
1774 * always condition on B_READ instead */
1775
1776 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1777 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1778 do_async, raid_addr, num_blocks,
1779 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1780
1781 if (rc) {
1782 bp->b_error = rc;
1783 bp->b_flags |= B_ERROR;
1784 bp->b_resid = bp->b_bcount;
1785 biodone(bp);
1786 /* continue loop */
1787 }
1788
1789 RF_LOCK_MUTEX(raidPtr->mutex);
1790 }
1791 RF_UNLOCK_MUTEX(raidPtr->mutex);
1792 }
1793
1794
1795
1796
1797 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1798
1799 int
1800 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1801 {
1802 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1803 struct buf *bp;
1804 struct raidbuf *raidbp = NULL;
1805
1806 req->queue = queue;
1807
1808 #if DIAGNOSTIC
1809 if (queue->raidPtr->raidid >= numraid) {
1810 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1811 numraid);
1812 panic("Invalid Unit number in rf_DispatchKernelIO");
1813 }
1814 #endif
1815
1816 bp = req->bp;
1817 #if 1
1818 /* XXX when there is a physical disk failure, someone is passing us a
1819 * buffer that contains old stuff!! Attempt to deal with this problem
1820 * without taking a performance hit... (not sure where the real bug
1821 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1822
1823 if (bp->b_flags & B_ERROR) {
1824 bp->b_flags &= ~B_ERROR;
1825 }
1826 if (bp->b_error != 0) {
1827 bp->b_error = 0;
1828 }
1829 #endif
1830 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1831 if (raidbp == NULL) {
1832 bp->b_flags |= B_ERROR;
1833 bp->b_error = ENOMEM;
1834 return (ENOMEM);
1835 }
1836 BUF_INIT(&raidbp->rf_buf);
1837
1838 /*
1839 * context for raidiodone
1840 */
1841 raidbp->rf_obp = bp;
1842 raidbp->req = req;
1843
1844 BIO_COPYPRIO(&raidbp->rf_buf, bp);
1845
1846 switch (req->type) {
1847 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1848 /* XXX need to do something extra here.. */
1849 /* I'm leaving this in, as I've never actually seen it used,
1850 * and I'd like folks to report it... GO */
1851 printf(("WAKEUP CALLED\n"));
1852 queue->numOutstanding++;
1853
1854 /* XXX need to glue the original buffer into this?? */
1855
1856 KernelWakeupFunc(&raidbp->rf_buf);
1857 break;
1858
1859 case RF_IO_TYPE_READ:
1860 case RF_IO_TYPE_WRITE:
1861 #if RF_ACC_TRACE > 0
1862 if (req->tracerec) {
1863 RF_ETIMER_START(req->tracerec->timer);
1864 }
1865 #endif
1866 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1867 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1868 req->sectorOffset, req->numSector,
1869 req->buf, KernelWakeupFunc, (void *) req,
1870 queue->raidPtr->logBytesPerSector, req->b_proc);
1871
1872 if (rf_debugKernelAccess) {
1873 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1874 (long) bp->b_blkno));
1875 }
1876 queue->numOutstanding++;
1877 queue->last_deq_sector = req->sectorOffset;
1878 /* acc wouldn't have been let in if there were any pending
1879 * reqs at any other priority */
1880 queue->curPriority = req->priority;
1881
1882 db1_printf(("Going for %c to unit %d col %d\n",
1883 req->type, queue->raidPtr->raidid,
1884 queue->col));
1885 db1_printf(("sector %d count %d (%d bytes) %d\n",
1886 (int) req->sectorOffset, (int) req->numSector,
1887 (int) (req->numSector <<
1888 queue->raidPtr->logBytesPerSector),
1889 (int) queue->raidPtr->logBytesPerSector));
1890 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1891 raidbp->rf_buf.b_vp->v_numoutput++;
1892 }
1893 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
1894
1895 break;
1896
1897 default:
1898 panic("bad req->type in rf_DispatchKernelIO");
1899 }
1900 db1_printf(("Exiting from DispatchKernelIO\n"));
1901
1902 return (0);
1903 }
1904 /* this is the callback function associated with a I/O invoked from
1905 kernel code.
1906 */
1907 static void
1908 KernelWakeupFunc(struct buf *vbp)
1909 {
1910 RF_DiskQueueData_t *req = NULL;
1911 RF_DiskQueue_t *queue;
1912 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1913 struct buf *bp;
1914 int s;
1915
1916 s = splbio();
1917 db1_printf(("recovering the request queue:\n"));
1918 req = raidbp->req;
1919
1920 bp = raidbp->rf_obp;
1921
1922 queue = (RF_DiskQueue_t *) req->queue;
1923
1924 if (raidbp->rf_buf.b_flags & B_ERROR) {
1925 bp->b_flags |= B_ERROR;
1926 bp->b_error = raidbp->rf_buf.b_error ?
1927 raidbp->rf_buf.b_error : EIO;
1928 }
1929
1930 /* XXX methinks this could be wrong... */
1931 #if 1
1932 bp->b_resid = raidbp->rf_buf.b_resid;
1933 #endif
1934 #if RF_ACC_TRACE > 0
1935 if (req->tracerec) {
1936 RF_ETIMER_STOP(req->tracerec->timer);
1937 RF_ETIMER_EVAL(req->tracerec->timer);
1938 RF_LOCK_MUTEX(rf_tracing_mutex);
1939 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1940 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1941 req->tracerec->num_phys_ios++;
1942 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1943 }
1944 #endif
1945 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1946
1947 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1948 * ballistic, and mark the component as hosed... */
1949
1950 if (bp->b_flags & B_ERROR) {
1951 /* Mark the disk as dead */
1952 /* but only mark it once... */
1953 /* and only if it wouldn't leave this RAID set
1954 completely broken */
1955 if ((queue->raidPtr->Disks[queue->col].status ==
1956 rf_ds_optimal) && (queue->raidPtr->numFailures <
1957 queue->raidPtr->Layout.map->faultsTolerated)) {
1958 printf("raid%d: IO Error. Marking %s as failed.\n",
1959 queue->raidPtr->raidid,
1960 queue->raidPtr->Disks[queue->col].devname);
1961 queue->raidPtr->Disks[queue->col].status =
1962 rf_ds_failed;
1963 queue->raidPtr->status = rf_rs_degraded;
1964 queue->raidPtr->numFailures++;
1965 queue->raidPtr->numNewFailures++;
1966 } else { /* Disk is already dead... */
1967 /* printf("Disk already marked as dead!\n"); */
1968 }
1969
1970 }
1971
1972 pool_put(&rf_pools.cbuf, raidbp);
1973
1974 /* Fill in the error value */
1975
1976 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1977
1978 simple_lock(&queue->raidPtr->iodone_lock);
1979
1980 /* Drop this one on the "finished" queue... */
1981 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1982
1983 /* Let the raidio thread know there is work to be done. */
1984 wakeup(&(queue->raidPtr->iodone));
1985
1986 simple_unlock(&queue->raidPtr->iodone_lock);
1987
1988 splx(s);
1989 }
1990
1991
1992
1993 /*
1994 * initialize a buf structure for doing an I/O in the kernel.
1995 */
1996 static void
1997 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1998 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
1999 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2000 struct proc *b_proc)
2001 {
2002 /* bp->b_flags = B_PHYS | rw_flag; */
2003 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2004 bp->b_bcount = numSect << logBytesPerSector;
2005 bp->b_bufsize = bp->b_bcount;
2006 bp->b_error = 0;
2007 bp->b_dev = dev;
2008 bp->b_data = buf;
2009 bp->b_blkno = startSect;
2010 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2011 if (bp->b_bcount == 0) {
2012 panic("bp->b_bcount is zero in InitBP!!");
2013 }
2014 bp->b_proc = b_proc;
2015 bp->b_iodone = cbFunc;
2016 bp->b_vp = b_vp;
2017
2018 }
2019
2020 static void
2021 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2022 struct disklabel *lp)
2023 {
2024 memset(lp, 0, sizeof(*lp));
2025
2026 /* fabricate a label... */
2027 lp->d_secperunit = raidPtr->totalSectors;
2028 lp->d_secsize = raidPtr->bytesPerSector;
2029 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2030 lp->d_ntracks = 4 * raidPtr->numCol;
2031 lp->d_ncylinders = raidPtr->totalSectors /
2032 (lp->d_nsectors * lp->d_ntracks);
2033 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2034
2035 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2036 lp->d_type = DTYPE_RAID;
2037 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2038 lp->d_rpm = 3600;
2039 lp->d_interleave = 1;
2040 lp->d_flags = 0;
2041
2042 lp->d_partitions[RAW_PART].p_offset = 0;
2043 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2044 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2045 lp->d_npartitions = RAW_PART + 1;
2046
2047 lp->d_magic = DISKMAGIC;
2048 lp->d_magic2 = DISKMAGIC;
2049 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2050
2051 }
2052 /*
2053 * Read the disklabel from the raid device. If one is not present, fake one
2054 * up.
2055 */
2056 static void
2057 raidgetdisklabel(dev_t dev)
2058 {
2059 int unit = raidunit(dev);
2060 struct raid_softc *rs = &raid_softc[unit];
2061 const char *errstring;
2062 struct disklabel *lp = rs->sc_dkdev.dk_label;
2063 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2064 RF_Raid_t *raidPtr;
2065
2066 db1_printf(("Getting the disklabel...\n"));
2067
2068 memset(clp, 0, sizeof(*clp));
2069
2070 raidPtr = raidPtrs[unit];
2071
2072 raidgetdefaultlabel(raidPtr, rs, lp);
2073
2074 /*
2075 * Call the generic disklabel extraction routine.
2076 */
2077 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2078 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2079 if (errstring)
2080 raidmakedisklabel(rs);
2081 else {
2082 int i;
2083 struct partition *pp;
2084
2085 /*
2086 * Sanity check whether the found disklabel is valid.
2087 *
2088 * This is necessary since total size of the raid device
2089 * may vary when an interleave is changed even though exactly
2090 * same componets are used, and old disklabel may used
2091 * if that is found.
2092 */
2093 if (lp->d_secperunit != rs->sc_size)
2094 printf("raid%d: WARNING: %s: "
2095 "total sector size in disklabel (%d) != "
2096 "the size of raid (%ld)\n", unit, rs->sc_xname,
2097 lp->d_secperunit, (long) rs->sc_size);
2098 for (i = 0; i < lp->d_npartitions; i++) {
2099 pp = &lp->d_partitions[i];
2100 if (pp->p_offset + pp->p_size > rs->sc_size)
2101 printf("raid%d: WARNING: %s: end of partition `%c' "
2102 "exceeds the size of raid (%ld)\n",
2103 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2104 }
2105 }
2106
2107 }
2108 /*
2109 * Take care of things one might want to take care of in the event
2110 * that a disklabel isn't present.
2111 */
2112 static void
2113 raidmakedisklabel(struct raid_softc *rs)
2114 {
2115 struct disklabel *lp = rs->sc_dkdev.dk_label;
2116 db1_printf(("Making a label..\n"));
2117
2118 /*
2119 * For historical reasons, if there's no disklabel present
2120 * the raw partition must be marked FS_BSDFFS.
2121 */
2122
2123 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2124
2125 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2126
2127 lp->d_checksum = dkcksum(lp);
2128 }
2129 /*
2130 * Lookup the provided name in the filesystem. If the file exists,
2131 * is a valid block device, and isn't being used by anyone else,
2132 * set *vpp to the file's vnode.
2133 * You'll find the original of this in ccd.c
2134 */
2135 int
2136 raidlookup(char *path, struct proc *p, struct vnode **vpp)
2137 {
2138 struct nameidata nd;
2139 struct vnode *vp;
2140 struct vattr va;
2141 int error;
2142
2143 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2144 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2145 return (error);
2146 }
2147 vp = nd.ni_vp;
2148 if (vp->v_usecount > 1) {
2149 VOP_UNLOCK(vp, 0);
2150 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2151 return (EBUSY);
2152 }
2153 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2154 VOP_UNLOCK(vp, 0);
2155 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2156 return (error);
2157 }
2158 /* XXX: eventually we should handle VREG, too. */
2159 if (va.va_type != VBLK) {
2160 VOP_UNLOCK(vp, 0);
2161 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2162 return (ENOTBLK);
2163 }
2164 VOP_UNLOCK(vp, 0);
2165 *vpp = vp;
2166 return (0);
2167 }
2168 /*
2169 * Wait interruptibly for an exclusive lock.
2170 *
2171 * XXX
2172 * Several drivers do this; it should be abstracted and made MP-safe.
2173 * (Hmm... where have we seen this warning before :-> GO )
2174 */
2175 static int
2176 raidlock(struct raid_softc *rs)
2177 {
2178 int error;
2179
2180 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2181 rs->sc_flags |= RAIDF_WANTED;
2182 if ((error =
2183 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2184 return (error);
2185 }
2186 rs->sc_flags |= RAIDF_LOCKED;
2187 return (0);
2188 }
2189 /*
2190 * Unlock and wake up any waiters.
2191 */
2192 static void
2193 raidunlock(struct raid_softc *rs)
2194 {
2195
2196 rs->sc_flags &= ~RAIDF_LOCKED;
2197 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2198 rs->sc_flags &= ~RAIDF_WANTED;
2199 wakeup(rs);
2200 }
2201 }
2202
2203
2204 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2205 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2206
2207 int
2208 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2209 {
2210 RF_ComponentLabel_t clabel;
2211 raidread_component_label(dev, b_vp, &clabel);
2212 clabel.mod_counter = mod_counter;
2213 clabel.clean = RF_RAID_CLEAN;
2214 raidwrite_component_label(dev, b_vp, &clabel);
2215 return(0);
2216 }
2217
2218
2219 int
2220 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2221 {
2222 RF_ComponentLabel_t clabel;
2223 raidread_component_label(dev, b_vp, &clabel);
2224 clabel.mod_counter = mod_counter;
2225 clabel.clean = RF_RAID_DIRTY;
2226 raidwrite_component_label(dev, b_vp, &clabel);
2227 return(0);
2228 }
2229
2230 /* ARGSUSED */
2231 int
2232 raidread_component_label(dev_t dev, struct vnode *b_vp,
2233 RF_ComponentLabel_t *clabel)
2234 {
2235 struct buf *bp;
2236 const struct bdevsw *bdev;
2237 int error;
2238
2239 /* XXX should probably ensure that we don't try to do this if
2240 someone has changed rf_protected_sectors. */
2241
2242 if (b_vp == NULL) {
2243 /* For whatever reason, this component is not valid.
2244 Don't try to read a component label from it. */
2245 return(EINVAL);
2246 }
2247
2248 /* get a block of the appropriate size... */
2249 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2250 bp->b_dev = dev;
2251
2252 /* get our ducks in a row for the read */
2253 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2254 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2255 bp->b_flags |= B_READ;
2256 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2257
2258 bdev = bdevsw_lookup(bp->b_dev);
2259 if (bdev == NULL)
2260 return (ENXIO);
2261 (*bdev->d_strategy)(bp);
2262
2263 error = biowait(bp);
2264
2265 if (!error) {
2266 memcpy(clabel, bp->b_data,
2267 sizeof(RF_ComponentLabel_t));
2268 }
2269
2270 brelse(bp);
2271 return(error);
2272 }
2273 /* ARGSUSED */
2274 int
2275 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2276 RF_ComponentLabel_t *clabel)
2277 {
2278 struct buf *bp;
2279 const struct bdevsw *bdev;
2280 int error;
2281
2282 /* get a block of the appropriate size... */
2283 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2284 bp->b_dev = dev;
2285
2286 /* get our ducks in a row for the write */
2287 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2288 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2289 bp->b_flags |= B_WRITE;
2290 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2291
2292 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2293
2294 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2295
2296 bdev = bdevsw_lookup(bp->b_dev);
2297 if (bdev == NULL)
2298 return (ENXIO);
2299 (*bdev->d_strategy)(bp);
2300 error = biowait(bp);
2301 brelse(bp);
2302 if (error) {
2303 #if 1
2304 printf("Failed to write RAID component info!\n");
2305 #endif
2306 }
2307
2308 return(error);
2309 }
2310
2311 void
2312 rf_markalldirty(RF_Raid_t *raidPtr)
2313 {
2314 RF_ComponentLabel_t clabel;
2315 int sparecol;
2316 int c;
2317 int j;
2318 int scol = -1;
2319
2320 raidPtr->mod_counter++;
2321 for (c = 0; c < raidPtr->numCol; c++) {
2322 /* we don't want to touch (at all) a disk that has
2323 failed */
2324 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2325 raidread_component_label(
2326 raidPtr->Disks[c].dev,
2327 raidPtr->raid_cinfo[c].ci_vp,
2328 &clabel);
2329 if (clabel.status == rf_ds_spared) {
2330 /* XXX do something special...
2331 but whatever you do, don't
2332 try to access it!! */
2333 } else {
2334 raidmarkdirty(
2335 raidPtr->Disks[c].dev,
2336 raidPtr->raid_cinfo[c].ci_vp,
2337 raidPtr->mod_counter);
2338 }
2339 }
2340 }
2341
2342 for( c = 0; c < raidPtr->numSpare ; c++) {
2343 sparecol = raidPtr->numCol + c;
2344 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2345 /*
2346
2347 we claim this disk is "optimal" if it's
2348 rf_ds_used_spare, as that means it should be
2349 directly substitutable for the disk it replaced.
2350 We note that too...
2351
2352 */
2353
2354 for(j=0;j<raidPtr->numCol;j++) {
2355 if (raidPtr->Disks[j].spareCol == sparecol) {
2356 scol = j;
2357 break;
2358 }
2359 }
2360
2361 raidread_component_label(
2362 raidPtr->Disks[sparecol].dev,
2363 raidPtr->raid_cinfo[sparecol].ci_vp,
2364 &clabel);
2365 /* make sure status is noted */
2366
2367 raid_init_component_label(raidPtr, &clabel);
2368
2369 clabel.row = 0;
2370 clabel.column = scol;
2371 /* Note: we *don't* change status from rf_ds_used_spare
2372 to rf_ds_optimal */
2373 /* clabel.status = rf_ds_optimal; */
2374
2375 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2376 raidPtr->raid_cinfo[sparecol].ci_vp,
2377 raidPtr->mod_counter);
2378 }
2379 }
2380 }
2381
2382
2383 void
2384 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2385 {
2386 RF_ComponentLabel_t clabel;
2387 int sparecol;
2388 int c;
2389 int j;
2390 int scol;
2391
2392 scol = -1;
2393
2394 /* XXX should do extra checks to make sure things really are clean,
2395 rather than blindly setting the clean bit... */
2396
2397 raidPtr->mod_counter++;
2398
2399 for (c = 0; c < raidPtr->numCol; c++) {
2400 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2401 raidread_component_label(
2402 raidPtr->Disks[c].dev,
2403 raidPtr->raid_cinfo[c].ci_vp,
2404 &clabel);
2405 /* make sure status is noted */
2406 clabel.status = rf_ds_optimal;
2407 /* bump the counter */
2408 clabel.mod_counter = raidPtr->mod_counter;
2409
2410 raidwrite_component_label(
2411 raidPtr->Disks[c].dev,
2412 raidPtr->raid_cinfo[c].ci_vp,
2413 &clabel);
2414 if (final == RF_FINAL_COMPONENT_UPDATE) {
2415 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2416 raidmarkclean(
2417 raidPtr->Disks[c].dev,
2418 raidPtr->raid_cinfo[c].ci_vp,
2419 raidPtr->mod_counter);
2420 }
2421 }
2422 }
2423 /* else we don't touch it.. */
2424 }
2425
2426 for( c = 0; c < raidPtr->numSpare ; c++) {
2427 sparecol = raidPtr->numCol + c;
2428 /* Need to ensure that the reconstruct actually completed! */
2429 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2430 /*
2431
2432 we claim this disk is "optimal" if it's
2433 rf_ds_used_spare, as that means it should be
2434 directly substitutable for the disk it replaced.
2435 We note that too...
2436
2437 */
2438
2439 for(j=0;j<raidPtr->numCol;j++) {
2440 if (raidPtr->Disks[j].spareCol == sparecol) {
2441 scol = j;
2442 break;
2443 }
2444 }
2445
2446 /* XXX shouldn't *really* need this... */
2447 raidread_component_label(
2448 raidPtr->Disks[sparecol].dev,
2449 raidPtr->raid_cinfo[sparecol].ci_vp,
2450 &clabel);
2451 /* make sure status is noted */
2452
2453 raid_init_component_label(raidPtr, &clabel);
2454
2455 clabel.mod_counter = raidPtr->mod_counter;
2456 clabel.column = scol;
2457 clabel.status = rf_ds_optimal;
2458
2459 raidwrite_component_label(
2460 raidPtr->Disks[sparecol].dev,
2461 raidPtr->raid_cinfo[sparecol].ci_vp,
2462 &clabel);
2463 if (final == RF_FINAL_COMPONENT_UPDATE) {
2464 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2465 raidmarkclean( raidPtr->Disks[sparecol].dev,
2466 raidPtr->raid_cinfo[sparecol].ci_vp,
2467 raidPtr->mod_counter);
2468 }
2469 }
2470 }
2471 }
2472 }
2473
2474 void
2475 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2476 {
2477 struct proc *p;
2478
2479 p = raidPtr->engine_thread;
2480
2481 if (vp != NULL) {
2482 if (auto_configured == 1) {
2483 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2484 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2485 vput(vp);
2486
2487 } else {
2488 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2489 }
2490 }
2491 }
2492
2493
2494 void
2495 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2496 {
2497 int r,c;
2498 struct vnode *vp;
2499 int acd;
2500
2501
2502 /* We take this opportunity to close the vnodes like we should.. */
2503
2504 for (c = 0; c < raidPtr->numCol; c++) {
2505 vp = raidPtr->raid_cinfo[c].ci_vp;
2506 acd = raidPtr->Disks[c].auto_configured;
2507 rf_close_component(raidPtr, vp, acd);
2508 raidPtr->raid_cinfo[c].ci_vp = NULL;
2509 raidPtr->Disks[c].auto_configured = 0;
2510 }
2511
2512 for (r = 0; r < raidPtr->numSpare; r++) {
2513 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2514 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2515 rf_close_component(raidPtr, vp, acd);
2516 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2517 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2518 }
2519 }
2520
2521
2522 void
2523 rf_ReconThread(struct rf_recon_req *req)
2524 {
2525 int s;
2526 RF_Raid_t *raidPtr;
2527
2528 s = splbio();
2529 raidPtr = (RF_Raid_t *) req->raidPtr;
2530 raidPtr->recon_in_progress = 1;
2531
2532 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2533 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2534
2535 RF_Free(req, sizeof(*req));
2536
2537 raidPtr->recon_in_progress = 0;
2538 splx(s);
2539
2540 /* That's all... */
2541 kthread_exit(0); /* does not return */
2542 }
2543
2544 void
2545 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2546 {
2547 int retcode;
2548 int s;
2549
2550 raidPtr->parity_rewrite_stripes_done = 0;
2551 raidPtr->parity_rewrite_in_progress = 1;
2552 s = splbio();
2553 retcode = rf_RewriteParity(raidPtr);
2554 splx(s);
2555 if (retcode) {
2556 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2557 } else {
2558 /* set the clean bit! If we shutdown correctly,
2559 the clean bit on each component label will get
2560 set */
2561 raidPtr->parity_good = RF_RAID_CLEAN;
2562 }
2563 raidPtr->parity_rewrite_in_progress = 0;
2564
2565 /* Anyone waiting for us to stop? If so, inform them... */
2566 if (raidPtr->waitShutdown) {
2567 wakeup(&raidPtr->parity_rewrite_in_progress);
2568 }
2569
2570 /* That's all... */
2571 kthread_exit(0); /* does not return */
2572 }
2573
2574
2575 void
2576 rf_CopybackThread(RF_Raid_t *raidPtr)
2577 {
2578 int s;
2579
2580 raidPtr->copyback_in_progress = 1;
2581 s = splbio();
2582 rf_CopybackReconstructedData(raidPtr);
2583 splx(s);
2584 raidPtr->copyback_in_progress = 0;
2585
2586 /* That's all... */
2587 kthread_exit(0); /* does not return */
2588 }
2589
2590
2591 void
2592 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2593 {
2594 int s;
2595 RF_Raid_t *raidPtr;
2596
2597 s = splbio();
2598 raidPtr = req->raidPtr;
2599 raidPtr->recon_in_progress = 1;
2600 rf_ReconstructInPlace(raidPtr, req->col);
2601 RF_Free(req, sizeof(*req));
2602 raidPtr->recon_in_progress = 0;
2603 splx(s);
2604
2605 /* That's all... */
2606 kthread_exit(0); /* does not return */
2607 }
2608
2609 RF_AutoConfig_t *
2610 rf_find_raid_components()
2611 {
2612 struct vnode *vp;
2613 struct disklabel label;
2614 struct device *dv;
2615 dev_t dev;
2616 int bmajor;
2617 int error;
2618 int i;
2619 int good_one;
2620 RF_ComponentLabel_t *clabel;
2621 RF_AutoConfig_t *ac_list;
2622 RF_AutoConfig_t *ac;
2623
2624
2625 /* initialize the AutoConfig list */
2626 ac_list = NULL;
2627
2628 /* we begin by trolling through *all* the devices on the system */
2629
2630 for (dv = alldevs.tqh_first; dv != NULL;
2631 dv = dv->dv_list.tqe_next) {
2632
2633 /* we are only interested in disks... */
2634 if (dv->dv_class != DV_DISK)
2635 continue;
2636
2637 /* we don't care about floppies... */
2638 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2639 continue;
2640 }
2641
2642 /* we don't care about CD's... */
2643 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2644 continue;
2645 }
2646
2647 /* hdfd is the Atari/Hades floppy driver */
2648 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2649 continue;
2650 }
2651 /* fdisa is the Atari/Milan floppy driver */
2652 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2653 continue;
2654 }
2655
2656 /* need to find the device_name_to_block_device_major stuff */
2657 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2658
2659 /* get a vnode for the raw partition of this disk */
2660
2661 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2662 if (bdevvp(dev, &vp))
2663 panic("RAID can't alloc vnode");
2664
2665 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2666
2667 if (error) {
2668 /* "Who cares." Continue looking
2669 for something that exists*/
2670 vput(vp);
2671 continue;
2672 }
2673
2674 /* Ok, the disk exists. Go get the disklabel. */
2675 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2676 if (error) {
2677 /*
2678 * XXX can't happen - open() would
2679 * have errored out (or faked up one)
2680 */
2681 printf("can't get label for dev %s%c (%d)!?!?\n",
2682 dv->dv_xname, 'a' + RAW_PART, error);
2683 }
2684
2685 /* don't need this any more. We'll allocate it again
2686 a little later if we really do... */
2687 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2688 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2689 vput(vp);
2690
2691 for (i=0; i < label.d_npartitions; i++) {
2692 /* We only support partitions marked as RAID */
2693 if (label.d_partitions[i].p_fstype != FS_RAID)
2694 continue;
2695
2696 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2697 if (bdevvp(dev, &vp))
2698 panic("RAID can't alloc vnode");
2699
2700 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2701 if (error) {
2702 /* Whatever... */
2703 vput(vp);
2704 continue;
2705 }
2706
2707 good_one = 0;
2708
2709 clabel = (RF_ComponentLabel_t *)
2710 malloc(sizeof(RF_ComponentLabel_t),
2711 M_RAIDFRAME, M_NOWAIT);
2712 if (clabel == NULL) {
2713 /* XXX CLEANUP HERE */
2714 printf("RAID auto config: out of memory!\n");
2715 return(NULL); /* XXX probably should panic? */
2716 }
2717
2718 if (!raidread_component_label(dev, vp, clabel)) {
2719 /* Got the label. Does it look reasonable? */
2720 if (rf_reasonable_label(clabel) &&
2721 (clabel->partitionSize <=
2722 label.d_partitions[i].p_size)) {
2723 #if DEBUG
2724 printf("Component on: %s%c: %d\n",
2725 dv->dv_xname, 'a'+i,
2726 label.d_partitions[i].p_size);
2727 rf_print_component_label(clabel);
2728 #endif
2729 /* if it's reasonable, add it,
2730 else ignore it. */
2731 ac = (RF_AutoConfig_t *)
2732 malloc(sizeof(RF_AutoConfig_t),
2733 M_RAIDFRAME,
2734 M_NOWAIT);
2735 if (ac == NULL) {
2736 /* XXX should panic?? */
2737 return(NULL);
2738 }
2739
2740 sprintf(ac->devname, "%s%c",
2741 dv->dv_xname, 'a'+i);
2742 ac->dev = dev;
2743 ac->vp = vp;
2744 ac->clabel = clabel;
2745 ac->next = ac_list;
2746 ac_list = ac;
2747 good_one = 1;
2748 }
2749 }
2750 if (!good_one) {
2751 /* cleanup */
2752 free(clabel, M_RAIDFRAME);
2753 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2754 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2755 vput(vp);
2756 }
2757 }
2758 }
2759 return(ac_list);
2760 }
2761
2762 static int
2763 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2764 {
2765
2766 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2767 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2768 ((clabel->clean == RF_RAID_CLEAN) ||
2769 (clabel->clean == RF_RAID_DIRTY)) &&
2770 clabel->row >=0 &&
2771 clabel->column >= 0 &&
2772 clabel->num_rows > 0 &&
2773 clabel->num_columns > 0 &&
2774 clabel->row < clabel->num_rows &&
2775 clabel->column < clabel->num_columns &&
2776 clabel->blockSize > 0 &&
2777 clabel->numBlocks > 0) {
2778 /* label looks reasonable enough... */
2779 return(1);
2780 }
2781 return(0);
2782 }
2783
2784
2785 #if DEBUG
2786 void
2787 rf_print_component_label(RF_ComponentLabel_t *clabel)
2788 {
2789 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2790 clabel->row, clabel->column,
2791 clabel->num_rows, clabel->num_columns);
2792 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2793 clabel->version, clabel->serial_number,
2794 clabel->mod_counter);
2795 printf(" Clean: %s Status: %d\n",
2796 clabel->clean ? "Yes" : "No", clabel->status );
2797 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2798 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2799 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2800 (char) clabel->parityConfig, clabel->blockSize,
2801 clabel->numBlocks);
2802 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2803 printf(" Contains root partition: %s\n",
2804 clabel->root_partition ? "Yes" : "No" );
2805 printf(" Last configured as: raid%d\n", clabel->last_unit );
2806 #if 0
2807 printf(" Config order: %d\n", clabel->config_order);
2808 #endif
2809
2810 }
2811 #endif
2812
2813 RF_ConfigSet_t *
2814 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2815 {
2816 RF_AutoConfig_t *ac;
2817 RF_ConfigSet_t *config_sets;
2818 RF_ConfigSet_t *cset;
2819 RF_AutoConfig_t *ac_next;
2820
2821
2822 config_sets = NULL;
2823
2824 /* Go through the AutoConfig list, and figure out which components
2825 belong to what sets. */
2826 ac = ac_list;
2827 while(ac!=NULL) {
2828 /* we're going to putz with ac->next, so save it here
2829 for use at the end of the loop */
2830 ac_next = ac->next;
2831
2832 if (config_sets == NULL) {
2833 /* will need at least this one... */
2834 config_sets = (RF_ConfigSet_t *)
2835 malloc(sizeof(RF_ConfigSet_t),
2836 M_RAIDFRAME, M_NOWAIT);
2837 if (config_sets == NULL) {
2838 panic("rf_create_auto_sets: No memory!");
2839 }
2840 /* this one is easy :) */
2841 config_sets->ac = ac;
2842 config_sets->next = NULL;
2843 config_sets->rootable = 0;
2844 ac->next = NULL;
2845 } else {
2846 /* which set does this component fit into? */
2847 cset = config_sets;
2848 while(cset!=NULL) {
2849 if (rf_does_it_fit(cset, ac)) {
2850 /* looks like it matches... */
2851 ac->next = cset->ac;
2852 cset->ac = ac;
2853 break;
2854 }
2855 cset = cset->next;
2856 }
2857 if (cset==NULL) {
2858 /* didn't find a match above... new set..*/
2859 cset = (RF_ConfigSet_t *)
2860 malloc(sizeof(RF_ConfigSet_t),
2861 M_RAIDFRAME, M_NOWAIT);
2862 if (cset == NULL) {
2863 panic("rf_create_auto_sets: No memory!");
2864 }
2865 cset->ac = ac;
2866 ac->next = NULL;
2867 cset->next = config_sets;
2868 cset->rootable = 0;
2869 config_sets = cset;
2870 }
2871 }
2872 ac = ac_next;
2873 }
2874
2875
2876 return(config_sets);
2877 }
2878
2879 static int
2880 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2881 {
2882 RF_ComponentLabel_t *clabel1, *clabel2;
2883
2884 /* If this one matches the *first* one in the set, that's good
2885 enough, since the other members of the set would have been
2886 through here too... */
2887 /* note that we are not checking partitionSize here..
2888
2889 Note that we are also not checking the mod_counters here.
2890 If everything else matches execpt the mod_counter, that's
2891 good enough for this test. We will deal with the mod_counters
2892 a little later in the autoconfiguration process.
2893
2894 (clabel1->mod_counter == clabel2->mod_counter) &&
2895
2896 The reason we don't check for this is that failed disks
2897 will have lower modification counts. If those disks are
2898 not added to the set they used to belong to, then they will
2899 form their own set, which may result in 2 different sets,
2900 for example, competing to be configured at raid0, and
2901 perhaps competing to be the root filesystem set. If the
2902 wrong ones get configured, or both attempt to become /,
2903 weird behaviour and or serious lossage will occur. Thus we
2904 need to bring them into the fold here, and kick them out at
2905 a later point.
2906
2907 */
2908
2909 clabel1 = cset->ac->clabel;
2910 clabel2 = ac->clabel;
2911 if ((clabel1->version == clabel2->version) &&
2912 (clabel1->serial_number == clabel2->serial_number) &&
2913 (clabel1->num_rows == clabel2->num_rows) &&
2914 (clabel1->num_columns == clabel2->num_columns) &&
2915 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2916 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2917 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2918 (clabel1->parityConfig == clabel2->parityConfig) &&
2919 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2920 (clabel1->blockSize == clabel2->blockSize) &&
2921 (clabel1->numBlocks == clabel2->numBlocks) &&
2922 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2923 (clabel1->root_partition == clabel2->root_partition) &&
2924 (clabel1->last_unit == clabel2->last_unit) &&
2925 (clabel1->config_order == clabel2->config_order)) {
2926 /* if it get's here, it almost *has* to be a match */
2927 } else {
2928 /* it's not consistent with somebody in the set..
2929 punt */
2930 return(0);
2931 }
2932 /* all was fine.. it must fit... */
2933 return(1);
2934 }
2935
2936 int
2937 rf_have_enough_components(RF_ConfigSet_t *cset)
2938 {
2939 RF_AutoConfig_t *ac;
2940 RF_AutoConfig_t *auto_config;
2941 RF_ComponentLabel_t *clabel;
2942 int c;
2943 int num_cols;
2944 int num_missing;
2945 int mod_counter;
2946 int mod_counter_found;
2947 int even_pair_failed;
2948 char parity_type;
2949
2950
2951 /* check to see that we have enough 'live' components
2952 of this set. If so, we can configure it if necessary */
2953
2954 num_cols = cset->ac->clabel->num_columns;
2955 parity_type = cset->ac->clabel->parityConfig;
2956
2957 /* XXX Check for duplicate components!?!?!? */
2958
2959 /* Determine what the mod_counter is supposed to be for this set. */
2960
2961 mod_counter_found = 0;
2962 mod_counter = 0;
2963 ac = cset->ac;
2964 while(ac!=NULL) {
2965 if (mod_counter_found==0) {
2966 mod_counter = ac->clabel->mod_counter;
2967 mod_counter_found = 1;
2968 } else {
2969 if (ac->clabel->mod_counter > mod_counter) {
2970 mod_counter = ac->clabel->mod_counter;
2971 }
2972 }
2973 ac = ac->next;
2974 }
2975
2976 num_missing = 0;
2977 auto_config = cset->ac;
2978
2979 even_pair_failed = 0;
2980 for(c=0; c<num_cols; c++) {
2981 ac = auto_config;
2982 while(ac!=NULL) {
2983 if ((ac->clabel->column == c) &&
2984 (ac->clabel->mod_counter == mod_counter)) {
2985 /* it's this one... */
2986 #if DEBUG
2987 printf("Found: %s at %d\n",
2988 ac->devname,c);
2989 #endif
2990 break;
2991 }
2992 ac=ac->next;
2993 }
2994 if (ac==NULL) {
2995 /* Didn't find one here! */
2996 /* special case for RAID 1, especially
2997 where there are more than 2
2998 components (where RAIDframe treats
2999 things a little differently :( ) */
3000 if (parity_type == '1') {
3001 if (c%2 == 0) { /* even component */
3002 even_pair_failed = 1;
3003 } else { /* odd component. If
3004 we're failed, and
3005 so is the even
3006 component, it's
3007 "Good Night, Charlie" */
3008 if (even_pair_failed == 1) {
3009 return(0);
3010 }
3011 }
3012 } else {
3013 /* normal accounting */
3014 num_missing++;
3015 }
3016 }
3017 if ((parity_type == '1') && (c%2 == 1)) {
3018 /* Just did an even component, and we didn't
3019 bail.. reset the even_pair_failed flag,
3020 and go on to the next component.... */
3021 even_pair_failed = 0;
3022 }
3023 }
3024
3025 clabel = cset->ac->clabel;
3026
3027 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3028 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3029 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3030 /* XXX this needs to be made *much* more general */
3031 /* Too many failures */
3032 return(0);
3033 }
3034 /* otherwise, all is well, and we've got enough to take a kick
3035 at autoconfiguring this set */
3036 return(1);
3037 }
3038
3039 void
3040 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3041 RF_Raid_t *raidPtr)
3042 {
3043 RF_ComponentLabel_t *clabel;
3044 int i;
3045
3046 clabel = ac->clabel;
3047
3048 /* 1. Fill in the common stuff */
3049 config->numRow = clabel->num_rows = 1;
3050 config->numCol = clabel->num_columns;
3051 config->numSpare = 0; /* XXX should this be set here? */
3052 config->sectPerSU = clabel->sectPerSU;
3053 config->SUsPerPU = clabel->SUsPerPU;
3054 config->SUsPerRU = clabel->SUsPerRU;
3055 config->parityConfig = clabel->parityConfig;
3056 /* XXX... */
3057 strcpy(config->diskQueueType,"fifo");
3058 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3059 config->layoutSpecificSize = 0; /* XXX ?? */
3060
3061 while(ac!=NULL) {
3062 /* row/col values will be in range due to the checks
3063 in reasonable_label() */
3064 strcpy(config->devnames[0][ac->clabel->column],
3065 ac->devname);
3066 ac = ac->next;
3067 }
3068
3069 for(i=0;i<RF_MAXDBGV;i++) {
3070 config->debugVars[i][0] = 0;
3071 }
3072 }
3073
3074 int
3075 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3076 {
3077 RF_ComponentLabel_t clabel;
3078 struct vnode *vp;
3079 dev_t dev;
3080 int column;
3081 int sparecol;
3082
3083 raidPtr->autoconfigure = new_value;
3084
3085 for(column=0; column<raidPtr->numCol; column++) {
3086 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3087 dev = raidPtr->Disks[column].dev;
3088 vp = raidPtr->raid_cinfo[column].ci_vp;
3089 raidread_component_label(dev, vp, &clabel);
3090 clabel.autoconfigure = new_value;
3091 raidwrite_component_label(dev, vp, &clabel);
3092 }
3093 }
3094 for(column = 0; column < raidPtr->numSpare ; column++) {
3095 sparecol = raidPtr->numCol + column;
3096 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3097 dev = raidPtr->Disks[sparecol].dev;
3098 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3099 raidread_component_label(dev, vp, &clabel);
3100 clabel.autoconfigure = new_value;
3101 raidwrite_component_label(dev, vp, &clabel);
3102 }
3103 }
3104 return(new_value);
3105 }
3106
3107 int
3108 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3109 {
3110 RF_ComponentLabel_t clabel;
3111 struct vnode *vp;
3112 dev_t dev;
3113 int column;
3114 int sparecol;
3115
3116 raidPtr->root_partition = new_value;
3117 for(column=0; column<raidPtr->numCol; column++) {
3118 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3119 dev = raidPtr->Disks[column].dev;
3120 vp = raidPtr->raid_cinfo[column].ci_vp;
3121 raidread_component_label(dev, vp, &clabel);
3122 clabel.root_partition = new_value;
3123 raidwrite_component_label(dev, vp, &clabel);
3124 }
3125 }
3126 for(column = 0; column < raidPtr->numSpare ; column++) {
3127 sparecol = raidPtr->numCol + column;
3128 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3129 dev = raidPtr->Disks[sparecol].dev;
3130 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3131 raidread_component_label(dev, vp, &clabel);
3132 clabel.root_partition = new_value;
3133 raidwrite_component_label(dev, vp, &clabel);
3134 }
3135 }
3136 return(new_value);
3137 }
3138
3139 void
3140 rf_release_all_vps(RF_ConfigSet_t *cset)
3141 {
3142 RF_AutoConfig_t *ac;
3143
3144 ac = cset->ac;
3145 while(ac!=NULL) {
3146 /* Close the vp, and give it back */
3147 if (ac->vp) {
3148 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3149 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3150 vput(ac->vp);
3151 ac->vp = NULL;
3152 }
3153 ac = ac->next;
3154 }
3155 }
3156
3157
3158 void
3159 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3160 {
3161 RF_AutoConfig_t *ac;
3162 RF_AutoConfig_t *next_ac;
3163
3164 ac = cset->ac;
3165 while(ac!=NULL) {
3166 next_ac = ac->next;
3167 /* nuke the label */
3168 free(ac->clabel, M_RAIDFRAME);
3169 /* cleanup the config structure */
3170 free(ac, M_RAIDFRAME);
3171 /* "next.." */
3172 ac = next_ac;
3173 }
3174 /* and, finally, nuke the config set */
3175 free(cset, M_RAIDFRAME);
3176 }
3177
3178
3179 void
3180 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3181 {
3182 /* current version number */
3183 clabel->version = RF_COMPONENT_LABEL_VERSION;
3184 clabel->serial_number = raidPtr->serial_number;
3185 clabel->mod_counter = raidPtr->mod_counter;
3186 clabel->num_rows = 1;
3187 clabel->num_columns = raidPtr->numCol;
3188 clabel->clean = RF_RAID_DIRTY; /* not clean */
3189 clabel->status = rf_ds_optimal; /* "It's good!" */
3190
3191 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3192 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3193 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3194
3195 clabel->blockSize = raidPtr->bytesPerSector;
3196 clabel->numBlocks = raidPtr->sectorsPerDisk;
3197
3198 /* XXX not portable */
3199 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3200 clabel->maxOutstanding = raidPtr->maxOutstanding;
3201 clabel->autoconfigure = raidPtr->autoconfigure;
3202 clabel->root_partition = raidPtr->root_partition;
3203 clabel->last_unit = raidPtr->raidid;
3204 clabel->config_order = raidPtr->config_order;
3205 }
3206
3207 int
3208 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3209 {
3210 RF_Raid_t *raidPtr;
3211 RF_Config_t *config;
3212 int raidID;
3213 int retcode;
3214
3215 #if DEBUG
3216 printf("RAID autoconfigure\n");
3217 #endif
3218
3219 retcode = 0;
3220 *unit = -1;
3221
3222 /* 1. Create a config structure */
3223
3224 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3225 M_RAIDFRAME,
3226 M_NOWAIT);
3227 if (config==NULL) {
3228 printf("Out of mem!?!?\n");
3229 /* XXX do something more intelligent here. */
3230 return(1);
3231 }
3232
3233 memset(config, 0, sizeof(RF_Config_t));
3234
3235 /*
3236 2. Figure out what RAID ID this one is supposed to live at
3237 See if we can get the same RAID dev that it was configured
3238 on last time..
3239 */
3240
3241 raidID = cset->ac->clabel->last_unit;
3242 if ((raidID < 0) || (raidID >= numraid)) {
3243 /* let's not wander off into lala land. */
3244 raidID = numraid - 1;
3245 }
3246 if (raidPtrs[raidID]->valid != 0) {
3247
3248 /*
3249 Nope... Go looking for an alternative...
3250 Start high so we don't immediately use raid0 if that's
3251 not taken.
3252 */
3253
3254 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3255 if (raidPtrs[raidID]->valid == 0) {
3256 /* can use this one! */
3257 break;
3258 }
3259 }
3260 }
3261
3262 if (raidID < 0) {
3263 /* punt... */
3264 printf("Unable to auto configure this set!\n");
3265 printf("(Out of RAID devs!)\n");
3266 return(1);
3267 }
3268
3269 #if DEBUG
3270 printf("Configuring raid%d:\n",raidID);
3271 #endif
3272
3273 raidPtr = raidPtrs[raidID];
3274
3275 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3276 raidPtr->raidid = raidID;
3277 raidPtr->openings = RAIDOUTSTANDING;
3278
3279 /* 3. Build the configuration structure */
3280 rf_create_configuration(cset->ac, config, raidPtr);
3281
3282 /* 4. Do the configuration */
3283 retcode = rf_Configure(raidPtr, config, cset->ac);
3284
3285 if (retcode == 0) {
3286
3287 raidinit(raidPtrs[raidID]);
3288
3289 rf_markalldirty(raidPtrs[raidID]);
3290 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3291 if (cset->ac->clabel->root_partition==1) {
3292 /* everything configured just fine. Make a note
3293 that this set is eligible to be root. */
3294 cset->rootable = 1;
3295 /* XXX do this here? */
3296 raidPtrs[raidID]->root_partition = 1;
3297 }
3298 }
3299
3300 /* 5. Cleanup */
3301 free(config, M_RAIDFRAME);
3302
3303 *unit = raidID;
3304 return(retcode);
3305 }
3306
3307 void
3308 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3309 {
3310 struct buf *bp;
3311
3312 bp = (struct buf *)desc->bp;
3313 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3314 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3315 }
3316
3317 void
3318 rf_pool_init(struct pool *p, size_t size, char *w_chan,
3319 size_t min, size_t max)
3320 {
3321 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3322 pool_sethiwat(p, max);
3323 pool_prime(p, min);
3324 pool_setlowat(p, min);
3325 }
3326