rf_netbsdkintf.c revision 1.207 1 /* $NetBSD: rf_netbsdkintf.c,v 1.207 2006/04/12 23:33:39 simonb Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.207 2006/04/12 23:33:39 simonb Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /* XXX Not sure if the following should be replacing the raidPtrs above,
237 or if it should be used in conjunction with that...
238 */
239
240 struct raid_softc {
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 size_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 int numraid = 0;
257
258 extern struct cfdriver raid_cd;
259
260 /*
261 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
262 * Be aware that large numbers can allow the driver to consume a lot of
263 * kernel memory, especially on writes, and in degraded mode reads.
264 *
265 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
266 * a single 64K write will typically require 64K for the old data,
267 * 64K for the old parity, and 64K for the new parity, for a total
268 * of 192K (if the parity buffer is not re-used immediately).
269 * Even it if is used immediately, that's still 128K, which when multiplied
270 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
271 *
272 * Now in degraded mode, for example, a 64K read on the above setup may
273 * require data reconstruction, which will require *all* of the 4 remaining
274 * disks to participate -- 4 * 32K/disk == 128K again.
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 6
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
288 struct disklabel *);
289 static void raidgetdisklabel(dev_t);
290 static void raidmakedisklabel(struct raid_softc *);
291
292 static int raidlock(struct raid_softc *);
293 static void raidunlock(struct raid_softc *);
294
295 static void rf_markalldirty(RF_Raid_t *);
296
297 struct device *raidrootdev;
298
299 void rf_ReconThread(struct rf_recon_req *);
300 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
301 void rf_CopybackThread(RF_Raid_t *raidPtr);
302 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
303 int rf_autoconfig(struct device *self);
304 void rf_buildroothack(RF_ConfigSet_t *);
305
306 RF_AutoConfig_t *rf_find_raid_components(void);
307 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
308 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
309 static int rf_reasonable_label(RF_ComponentLabel_t *);
310 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
311 int rf_set_autoconfig(RF_Raid_t *, int);
312 int rf_set_rootpartition(RF_Raid_t *, int);
313 void rf_release_all_vps(RF_ConfigSet_t *);
314 void rf_cleanup_config_set(RF_ConfigSet_t *);
315 int rf_have_enough_components(RF_ConfigSet_t *);
316 int rf_auto_config_set(RF_ConfigSet_t *, int *);
317
318 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
319 allow autoconfig to take place.
320 Note that this is overridden by having
321 RAID_AUTOCONFIG as an option in the
322 kernel config file. */
323
324 struct RF_Pools_s rf_pools;
325
326 void
327 raidattach(int num)
328 {
329 int raidID;
330 int i, rc;
331
332 #ifdef DEBUG
333 printf("raidattach: Asked for %d units\n", num);
334 #endif
335
336 if (num <= 0) {
337 #ifdef DIAGNOSTIC
338 panic("raidattach: count <= 0");
339 #endif
340 return;
341 }
342 /* This is where all the initialization stuff gets done. */
343
344 numraid = num;
345
346 /* Make some space for requested number of units... */
347
348 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
349 if (raidPtrs == NULL) {
350 panic("raidPtrs is NULL!!");
351 }
352
353 rf_mutex_init(&rf_sparet_wait_mutex);
354
355 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
356
357 for (i = 0; i < num; i++)
358 raidPtrs[i] = NULL;
359 rc = rf_BootRaidframe();
360 if (rc == 0)
361 printf("Kernelized RAIDframe activated\n");
362 else
363 panic("Serious error booting RAID!!");
364
365 /* put together some datastructures like the CCD device does.. This
366 * lets us lock the device and what-not when it gets opened. */
367
368 raid_softc = (struct raid_softc *)
369 malloc(num * sizeof(struct raid_softc),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raid_softc == NULL) {
372 printf("WARNING: no memory for RAIDframe driver\n");
373 return;
374 }
375
376 memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
379 M_RAIDFRAME, M_NOWAIT);
380 if (raidrootdev == NULL) {
381 panic("No memory for RAIDframe driver!!?!?!");
382 }
383
384 for (raidID = 0; raidID < num; raidID++) {
385 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
386 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
387
388 /* XXXJRT Should use config_attach_pseudo() */
389
390 raidrootdev[raidID].dv_class = DV_DISK;
391 raidrootdev[raidID].dv_cfdata = NULL;
392 raidrootdev[raidID].dv_unit = raidID;
393 raidrootdev[raidID].dv_parent = NULL;
394 raidrootdev[raidID].dv_flags = 0;
395 raidrootdev[raidID].dv_cfdriver = &raid_cd;
396 snprintf(raidrootdev[raidID].dv_xname,
397 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
398
399 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
400 (RF_Raid_t *));
401 if (raidPtrs[raidID] == NULL) {
402 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
403 numraid = raidID;
404 return;
405 }
406 }
407
408 #ifdef RAID_AUTOCONFIG
409 raidautoconfig = 1;
410 #endif
411
412 /*
413 * Register a finalizer which will be used to auto-config RAID
414 * sets once all real hardware devices have been found.
415 */
416 if (config_finalize_register(NULL, rf_autoconfig) != 0)
417 printf("WARNING: unable to register RAIDframe finalizer\n");
418 }
419
420 int
421 rf_autoconfig(struct device *self)
422 {
423 RF_AutoConfig_t *ac_list;
424 RF_ConfigSet_t *config_sets;
425
426 if (raidautoconfig == 0)
427 return (0);
428
429 /* XXX This code can only be run once. */
430 raidautoconfig = 0;
431
432 /* 1. locate all RAID components on the system */
433 #ifdef DEBUG
434 printf("Searching for RAID components...\n");
435 #endif
436 ac_list = rf_find_raid_components();
437
438 /* 2. Sort them into their respective sets. */
439 config_sets = rf_create_auto_sets(ac_list);
440
441 /*
442 * 3. Evaluate each set andconfigure the valid ones.
443 * This gets done in rf_buildroothack().
444 */
445 rf_buildroothack(config_sets);
446
447 return (1);
448 }
449
450 void
451 rf_buildroothack(RF_ConfigSet_t *config_sets)
452 {
453 RF_ConfigSet_t *cset;
454 RF_ConfigSet_t *next_cset;
455 int retcode;
456 int raidID;
457 int rootID;
458 int num_root;
459
460 rootID = 0;
461 num_root = 0;
462 cset = config_sets;
463 while(cset != NULL ) {
464 next_cset = cset->next;
465 if (rf_have_enough_components(cset) &&
466 cset->ac->clabel->autoconfigure==1) {
467 retcode = rf_auto_config_set(cset,&raidID);
468 if (!retcode) {
469 if (cset->rootable) {
470 rootID = raidID;
471 num_root++;
472 }
473 } else {
474 /* The autoconfig didn't work :( */
475 #if DEBUG
476 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
477 #endif
478 rf_release_all_vps(cset);
479 }
480 } else {
481 /* we're not autoconfiguring this set...
482 release the associated resources */
483 rf_release_all_vps(cset);
484 }
485 /* cleanup */
486 rf_cleanup_config_set(cset);
487 cset = next_cset;
488 }
489
490 /* we found something bootable... */
491
492 if (num_root == 1) {
493 booted_device = &raidrootdev[rootID];
494 } else if (num_root > 1) {
495 /* we can't guess.. require the user to answer... */
496 boothowto |= RB_ASKNAME;
497 }
498 }
499
500
501 int
502 raidsize(dev_t dev)
503 {
504 struct raid_softc *rs;
505 struct disklabel *lp;
506 int part, unit, omask, size;
507
508 unit = raidunit(dev);
509 if (unit >= numraid)
510 return (-1);
511 rs = &raid_softc[unit];
512
513 if ((rs->sc_flags & RAIDF_INITED) == 0)
514 return (-1);
515
516 part = DISKPART(dev);
517 omask = rs->sc_dkdev.dk_openmask & (1 << part);
518 lp = rs->sc_dkdev.dk_label;
519
520 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
521 return (-1);
522
523 if (lp->d_partitions[part].p_fstype != FS_SWAP)
524 size = -1;
525 else
526 size = lp->d_partitions[part].p_size *
527 (lp->d_secsize / DEV_BSIZE);
528
529 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
530 return (-1);
531
532 return (size);
533
534 }
535
536 int
537 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
538 {
539 /* Not implemented. */
540 return ENXIO;
541 }
542 /* ARGSUSED */
543 int
544 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
545 {
546 int unit = raidunit(dev);
547 struct raid_softc *rs;
548 struct disklabel *lp;
549 int part, pmask;
550 int error = 0;
551
552 if (unit >= numraid)
553 return (ENXIO);
554 rs = &raid_softc[unit];
555
556 if ((error = raidlock(rs)) != 0)
557 return (error);
558 lp = rs->sc_dkdev.dk_label;
559
560 part = DISKPART(dev);
561 pmask = (1 << part);
562
563 if ((rs->sc_flags & RAIDF_INITED) &&
564 (rs->sc_dkdev.dk_openmask == 0))
565 raidgetdisklabel(dev);
566
567 /* make sure that this partition exists */
568
569 if (part != RAW_PART) {
570 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
571 ((part >= lp->d_npartitions) ||
572 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
573 error = ENXIO;
574 raidunlock(rs);
575 return (error);
576 }
577 }
578 /* Prevent this unit from being unconfigured while open. */
579 switch (fmt) {
580 case S_IFCHR:
581 rs->sc_dkdev.dk_copenmask |= pmask;
582 break;
583
584 case S_IFBLK:
585 rs->sc_dkdev.dk_bopenmask |= pmask;
586 break;
587 }
588
589 if ((rs->sc_dkdev.dk_openmask == 0) &&
590 ((rs->sc_flags & RAIDF_INITED) != 0)) {
591 /* First one... mark things as dirty... Note that we *MUST*
592 have done a configure before this. I DO NOT WANT TO BE
593 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
594 THAT THEY BELONG TOGETHER!!!!! */
595 /* XXX should check to see if we're only open for reading
596 here... If so, we needn't do this, but then need some
597 other way of keeping track of what's happened.. */
598
599 rf_markalldirty( raidPtrs[unit] );
600 }
601
602
603 rs->sc_dkdev.dk_openmask =
604 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
605
606 raidunlock(rs);
607
608 return (error);
609
610
611 }
612 /* ARGSUSED */
613 int
614 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
615 {
616 int unit = raidunit(dev);
617 struct raid_softc *rs;
618 int error = 0;
619 int part;
620
621 if (unit >= numraid)
622 return (ENXIO);
623 rs = &raid_softc[unit];
624
625 if ((error = raidlock(rs)) != 0)
626 return (error);
627
628 part = DISKPART(dev);
629
630 /* ...that much closer to allowing unconfiguration... */
631 switch (fmt) {
632 case S_IFCHR:
633 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
634 break;
635
636 case S_IFBLK:
637 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
638 break;
639 }
640 rs->sc_dkdev.dk_openmask =
641 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
642
643 if ((rs->sc_dkdev.dk_openmask == 0) &&
644 ((rs->sc_flags & RAIDF_INITED) != 0)) {
645 /* Last one... device is not unconfigured yet.
646 Device shutdown has taken care of setting the
647 clean bits if RAIDF_INITED is not set
648 mark things as clean... */
649
650 rf_update_component_labels(raidPtrs[unit],
651 RF_FINAL_COMPONENT_UPDATE);
652 if (doing_shutdown) {
653 /* last one, and we're going down, so
654 lights out for this RAID set too. */
655 error = rf_Shutdown(raidPtrs[unit]);
656
657 /* It's no longer initialized... */
658 rs->sc_flags &= ~RAIDF_INITED;
659
660 /* Detach the disk. */
661 pseudo_disk_detach(&rs->sc_dkdev);
662 }
663 }
664
665 raidunlock(rs);
666 return (0);
667
668 }
669
670 void
671 raidstrategy(struct buf *bp)
672 {
673 int s;
674
675 unsigned int raidID = raidunit(bp->b_dev);
676 RF_Raid_t *raidPtr;
677 struct raid_softc *rs = &raid_softc[raidID];
678 int wlabel;
679
680 if ((rs->sc_flags & RAIDF_INITED) ==0) {
681 bp->b_error = ENXIO;
682 bp->b_flags |= B_ERROR;
683 goto done;
684 }
685 if (raidID >= numraid || !raidPtrs[raidID]) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 goto done;
689 }
690 raidPtr = raidPtrs[raidID];
691 if (!raidPtr->valid) {
692 bp->b_error = ENODEV;
693 bp->b_flags |= B_ERROR;
694 goto done;
695 }
696 if (bp->b_bcount == 0) {
697 db1_printf(("b_bcount is zero..\n"));
698 goto done;
699 }
700
701 /*
702 * Do bounds checking and adjust transfer. If there's an
703 * error, the bounds check will flag that for us.
704 */
705
706 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
707 if (DISKPART(bp->b_dev) == RAW_PART) {
708 uint64_t size; /* device size in DEV_BSIZE unit */
709
710 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
711 size = raidPtr->totalSectors <<
712 (raidPtr->logBytesPerSector - DEV_BSHIFT);
713 } else {
714 size = raidPtr->totalSectors >>
715 (DEV_BSHIFT - raidPtr->logBytesPerSector);
716 }
717 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
718 goto done;
719 }
720 } else {
721 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
722 db1_printf(("Bounds check failed!!:%d %d\n",
723 (int) bp->b_blkno, (int) wlabel));
724 goto done;
725 }
726 }
727 s = splbio();
728
729 bp->b_resid = 0;
730
731 /* stuff it onto our queue */
732 BUFQ_PUT(rs->buf_queue, bp);
733
734 /* scheduled the IO to happen at the next convenient time */
735 wakeup(&(raidPtrs[raidID]->iodone));
736
737 splx(s);
738 return;
739
740 done:
741 bp->b_resid = bp->b_bcount;
742 biodone(bp);
743 }
744 /* ARGSUSED */
745 int
746 raidread(dev_t dev, struct uio *uio, int flags)
747 {
748 int unit = raidunit(dev);
749 struct raid_softc *rs;
750
751 if (unit >= numraid)
752 return (ENXIO);
753 rs = &raid_softc[unit];
754
755 if ((rs->sc_flags & RAIDF_INITED) == 0)
756 return (ENXIO);
757
758 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
759
760 }
761 /* ARGSUSED */
762 int
763 raidwrite(dev_t dev, struct uio *uio, int flags)
764 {
765 int unit = raidunit(dev);
766 struct raid_softc *rs;
767
768 if (unit >= numraid)
769 return (ENXIO);
770 rs = &raid_softc[unit];
771
772 if ((rs->sc_flags & RAIDF_INITED) == 0)
773 return (ENXIO);
774
775 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
776
777 }
778
779 int
780 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
781 {
782 int unit = raidunit(dev);
783 int error = 0;
784 int part, pmask;
785 struct raid_softc *rs;
786 RF_Config_t *k_cfg, *u_cfg;
787 RF_Raid_t *raidPtr;
788 RF_RaidDisk_t *diskPtr;
789 RF_AccTotals_t *totals;
790 RF_DeviceConfig_t *d_cfg, **ucfgp;
791 u_char *specific_buf;
792 int retcode = 0;
793 int column;
794 int raidid;
795 struct rf_recon_req *rrcopy, *rr;
796 RF_ComponentLabel_t *clabel;
797 RF_ComponentLabel_t ci_label;
798 RF_ComponentLabel_t **clabel_ptr;
799 RF_SingleComponent_t *sparePtr,*componentPtr;
800 RF_SingleComponent_t hot_spare;
801 RF_SingleComponent_t component;
802 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
803 int i, j, d;
804 #ifdef __HAVE_OLD_DISKLABEL
805 struct disklabel newlabel;
806 #endif
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811 raidPtr = raidPtrs[unit];
812
813 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
814 (int) DISKPART(dev), (int) unit, (int) cmd));
815
816 /* Must be open for writes for these commands... */
817 switch (cmd) {
818 case DIOCSDINFO:
819 case DIOCWDINFO:
820 #ifdef __HAVE_OLD_DISKLABEL
821 case ODIOCWDINFO:
822 case ODIOCSDINFO:
823 #endif
824 case DIOCWLABEL:
825 if ((flag & FWRITE) == 0)
826 return (EBADF);
827 }
828
829 /* Must be initialized for these... */
830 switch (cmd) {
831 case DIOCGDINFO:
832 case DIOCSDINFO:
833 case DIOCWDINFO:
834 #ifdef __HAVE_OLD_DISKLABEL
835 case ODIOCGDINFO:
836 case ODIOCWDINFO:
837 case ODIOCSDINFO:
838 case ODIOCGDEFLABEL:
839 #endif
840 case DIOCGPART:
841 case DIOCWLABEL:
842 case DIOCGDEFLABEL:
843 case RAIDFRAME_SHUTDOWN:
844 case RAIDFRAME_REWRITEPARITY:
845 case RAIDFRAME_GET_INFO:
846 case RAIDFRAME_RESET_ACCTOTALS:
847 case RAIDFRAME_GET_ACCTOTALS:
848 case RAIDFRAME_KEEP_ACCTOTALS:
849 case RAIDFRAME_GET_SIZE:
850 case RAIDFRAME_FAIL_DISK:
851 case RAIDFRAME_COPYBACK:
852 case RAIDFRAME_CHECK_RECON_STATUS:
853 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
854 case RAIDFRAME_GET_COMPONENT_LABEL:
855 case RAIDFRAME_SET_COMPONENT_LABEL:
856 case RAIDFRAME_ADD_HOT_SPARE:
857 case RAIDFRAME_REMOVE_HOT_SPARE:
858 case RAIDFRAME_INIT_LABELS:
859 case RAIDFRAME_REBUILD_IN_PLACE:
860 case RAIDFRAME_CHECK_PARITY:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS:
864 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
865 case RAIDFRAME_SET_AUTOCONFIG:
866 case RAIDFRAME_SET_ROOT:
867 case RAIDFRAME_DELETE_COMPONENT:
868 case RAIDFRAME_INCORPORATE_HOT_SPARE:
869 if ((rs->sc_flags & RAIDF_INITED) == 0)
870 return (ENXIO);
871 }
872
873 switch (cmd) {
874
875 /* configure the system */
876 case RAIDFRAME_CONFIGURE:
877
878 if (raidPtr->valid) {
879 /* There is a valid RAID set running on this unit! */
880 printf("raid%d: Device already configured!\n",unit);
881 return(EINVAL);
882 }
883
884 /* copy-in the configuration information */
885 /* data points to a pointer to the configuration structure */
886
887 u_cfg = *((RF_Config_t **) data);
888 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
889 if (k_cfg == NULL) {
890 return (ENOMEM);
891 }
892 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
893 if (retcode) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
896 retcode));
897 return (retcode);
898 }
899 /* allocate a buffer for the layout-specific data, and copy it
900 * in */
901 if (k_cfg->layoutSpecificSize) {
902 if (k_cfg->layoutSpecificSize > 10000) {
903 /* sanity check */
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (EINVAL);
906 }
907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
908 (u_char *));
909 if (specific_buf == NULL) {
910 RF_Free(k_cfg, sizeof(RF_Config_t));
911 return (ENOMEM);
912 }
913 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
914 k_cfg->layoutSpecificSize);
915 if (retcode) {
916 RF_Free(k_cfg, sizeof(RF_Config_t));
917 RF_Free(specific_buf,
918 k_cfg->layoutSpecificSize);
919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
920 retcode));
921 return (retcode);
922 }
923 } else
924 specific_buf = NULL;
925 k_cfg->layoutSpecific = specific_buf;
926
927 /* should do some kind of sanity check on the configuration.
928 * Store the sum of all the bytes in the last byte? */
929
930 /* configure the system */
931
932 /*
933 * Clear the entire RAID descriptor, just to make sure
934 * there is no stale data left in the case of a
935 * reconfiguration
936 */
937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
938 raidPtr->raidid = unit;
939
940 retcode = rf_Configure(raidPtr, k_cfg, NULL);
941
942 if (retcode == 0) {
943
944 /* allow this many simultaneous IO's to
945 this RAID device */
946 raidPtr->openings = RAIDOUTSTANDING;
947
948 raidinit(raidPtr);
949 rf_markalldirty(raidPtr);
950 }
951 /* free the buffers. No return code here. */
952 if (k_cfg->layoutSpecificSize) {
953 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
954 }
955 RF_Free(k_cfg, sizeof(RF_Config_t));
956
957 return (retcode);
958
959 /* shutdown the system */
960 case RAIDFRAME_SHUTDOWN:
961
962 if ((error = raidlock(rs)) != 0)
963 return (error);
964
965 /*
966 * If somebody has a partition mounted, we shouldn't
967 * shutdown.
968 */
969
970 part = DISKPART(dev);
971 pmask = (1 << part);
972 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
973 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
974 (rs->sc_dkdev.dk_copenmask & pmask))) {
975 raidunlock(rs);
976 return (EBUSY);
977 }
978
979 retcode = rf_Shutdown(raidPtr);
980
981 /* It's no longer initialized... */
982 rs->sc_flags &= ~RAIDF_INITED;
983
984 /* Detach the disk. */
985 pseudo_disk_detach(&rs->sc_dkdev);
986
987 raidunlock(rs);
988
989 return (retcode);
990 case RAIDFRAME_GET_COMPONENT_LABEL:
991 clabel_ptr = (RF_ComponentLabel_t **) data;
992 /* need to read the component label for the disk indicated
993 by row,column in clabel */
994
995 /* For practice, let's get it directly fromdisk, rather
996 than from the in-core copy */
997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
998 (RF_ComponentLabel_t *));
999 if (clabel == NULL)
1000 return (ENOMEM);
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 clabel->row = 0; /* Don't allow looking at anything else.*/
1011
1012 column = clabel->column;
1013
1014 if ((column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1021 raidPtr->raid_cinfo[column].ci_vp,
1022 clabel );
1023
1024 if (retcode == 0) {
1025 retcode = copyout(clabel, *clabel_ptr,
1026 sizeof(RF_ComponentLabel_t));
1027 }
1028 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1029 return (retcode);
1030
1031 case RAIDFRAME_SET_COMPONENT_LABEL:
1032 clabel = (RF_ComponentLabel_t *) data;
1033
1034 /* XXX check the label for valid stuff... */
1035 /* Note that some things *should not* get modified --
1036 the user should be re-initing the labels instead of
1037 trying to patch things.
1038 */
1039
1040 raidid = raidPtr->raidid;
1041 #if DEBUG
1042 printf("raid%d: Got component label:\n", raidid);
1043 printf("raid%d: Version: %d\n", raidid, clabel->version);
1044 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1045 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1046 printf("raid%d: Column: %d\n", raidid, clabel->column);
1047 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1048 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1049 printf("raid%d: Status: %d\n", raidid, clabel->status);
1050 #endif
1051 clabel->row = 0;
1052 column = clabel->column;
1053
1054 if ((column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[column].dev,
1065 raidPtr->raid_cinfo[column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 raid_init_component_label(raidPtr, &ci_label);
1082 ci_label.serial_number = clabel->serial_number;
1083 ci_label.row = 0; /* we dont' pretend to support more */
1084
1085 for(column=0;column<raidPtr->numCol;column++) {
1086 diskPtr = &raidPtr->Disks[column];
1087 if (!RF_DEAD_DISK(diskPtr->status)) {
1088 ci_label.partitionSize = diskPtr->partitionSize;
1089 ci_label.column = column;
1090 raidwrite_component_label(
1091 raidPtr->Disks[column].dev,
1092 raidPtr->raid_cinfo[column].ci_vp,
1093 &ci_label );
1094 }
1095 }
1096
1097 return (retcode);
1098 case RAIDFRAME_SET_AUTOCONFIG:
1099 d = rf_set_autoconfig(raidPtr, *(int *) data);
1100 printf("raid%d: New autoconfig value is: %d\n",
1101 raidPtr->raidid, d);
1102 *(int *) data = d;
1103 return (retcode);
1104
1105 case RAIDFRAME_SET_ROOT:
1106 d = rf_set_rootpartition(raidPtr, *(int *) data);
1107 printf("raid%d: New rootpartition value is: %d\n",
1108 raidPtr->raidid, d);
1109 *(int *) data = d;
1110 return (retcode);
1111
1112 /* initialize all parity */
1113 case RAIDFRAME_REWRITEPARITY:
1114
1115 if (raidPtr->Layout.map->faultsTolerated == 0) {
1116 /* Parity for RAID 0 is trivially correct */
1117 raidPtr->parity_good = RF_RAID_CLEAN;
1118 return(0);
1119 }
1120
1121 if (raidPtr->parity_rewrite_in_progress == 1) {
1122 /* Re-write is already in progress! */
1123 return(EINVAL);
1124 }
1125
1126 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1127 rf_RewriteParityThread,
1128 raidPtr,"raid_parity");
1129 return (retcode);
1130
1131
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 sparePtr = (RF_SingleComponent_t *) data;
1134 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1135 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1136 return(retcode);
1137
1138 case RAIDFRAME_REMOVE_HOT_SPARE:
1139 return(retcode);
1140
1141 case RAIDFRAME_DELETE_COMPONENT:
1142 componentPtr = (RF_SingleComponent_t *)data;
1143 memcpy( &component, componentPtr,
1144 sizeof(RF_SingleComponent_t));
1145 retcode = rf_delete_component(raidPtr, &component);
1146 return(retcode);
1147
1148 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1149 componentPtr = (RF_SingleComponent_t *)data;
1150 memcpy( &component, componentPtr,
1151 sizeof(RF_SingleComponent_t));
1152 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1153 return(retcode);
1154
1155 case RAIDFRAME_REBUILD_IN_PLACE:
1156
1157 if (raidPtr->Layout.map->faultsTolerated == 0) {
1158 /* Can't do this on a RAID 0!! */
1159 return(EINVAL);
1160 }
1161
1162 if (raidPtr->recon_in_progress == 1) {
1163 /* a reconstruct is already in progress! */
1164 return(EINVAL);
1165 }
1166
1167 componentPtr = (RF_SingleComponent_t *) data;
1168 memcpy( &component, componentPtr,
1169 sizeof(RF_SingleComponent_t));
1170 component.row = 0; /* we don't support any more */
1171 column = component.column;
1172
1173 if ((column < 0) || (column >= raidPtr->numCol)) {
1174 return(EINVAL);
1175 }
1176
1177 RF_LOCK_MUTEX(raidPtr->mutex);
1178 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1179 (raidPtr->numFailures > 0)) {
1180 /* XXX 0 above shouldn't be constant!!! */
1181 /* some component other than this has failed.
1182 Let's not make things worse than they already
1183 are... */
1184 printf("raid%d: Unable to reconstruct to disk at:\n",
1185 raidPtr->raidid);
1186 printf("raid%d: Col: %d Too many failures.\n",
1187 raidPtr->raidid, column);
1188 RF_UNLOCK_MUTEX(raidPtr->mutex);
1189 return (EINVAL);
1190 }
1191 if (raidPtr->Disks[column].status ==
1192 rf_ds_reconstructing) {
1193 printf("raid%d: Unable to reconstruct to disk at:\n",
1194 raidPtr->raidid);
1195 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1196
1197 RF_UNLOCK_MUTEX(raidPtr->mutex);
1198 return (EINVAL);
1199 }
1200 if (raidPtr->Disks[column].status == rf_ds_spared) {
1201 RF_UNLOCK_MUTEX(raidPtr->mutex);
1202 return (EINVAL);
1203 }
1204 RF_UNLOCK_MUTEX(raidPtr->mutex);
1205
1206 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1207 if (rrcopy == NULL)
1208 return(ENOMEM);
1209
1210 rrcopy->raidPtr = (void *) raidPtr;
1211 rrcopy->col = column;
1212
1213 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1214 rf_ReconstructInPlaceThread,
1215 rrcopy,"raid_reconip");
1216 return(retcode);
1217
1218 case RAIDFRAME_GET_INFO:
1219 if (!raidPtr->valid)
1220 return (ENODEV);
1221 ucfgp = (RF_DeviceConfig_t **) data;
1222 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1223 (RF_DeviceConfig_t *));
1224 if (d_cfg == NULL)
1225 return (ENOMEM);
1226 d_cfg->rows = 1; /* there is only 1 row now */
1227 d_cfg->cols = raidPtr->numCol;
1228 d_cfg->ndevs = raidPtr->numCol;
1229 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1230 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1231 return (ENOMEM);
1232 }
1233 d_cfg->nspares = raidPtr->numSpare;
1234 if (d_cfg->nspares >= RF_MAX_DISKS) {
1235 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1236 return (ENOMEM);
1237 }
1238 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1239 d = 0;
1240 for (j = 0; j < d_cfg->cols; j++) {
1241 d_cfg->devs[d] = raidPtr->Disks[j];
1242 d++;
1243 }
1244 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1245 d_cfg->spares[i] = raidPtr->Disks[j];
1246 }
1247 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1248 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1249
1250 return (retcode);
1251
1252 case RAIDFRAME_CHECK_PARITY:
1253 *(int *) data = raidPtr->parity_good;
1254 return (0);
1255
1256 case RAIDFRAME_RESET_ACCTOTALS:
1257 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1258 return (0);
1259
1260 case RAIDFRAME_GET_ACCTOTALS:
1261 totals = (RF_AccTotals_t *) data;
1262 *totals = raidPtr->acc_totals;
1263 return (0);
1264
1265 case RAIDFRAME_KEEP_ACCTOTALS:
1266 raidPtr->keep_acc_totals = *(int *)data;
1267 return (0);
1268
1269 case RAIDFRAME_GET_SIZE:
1270 *(int *) data = raidPtr->totalSectors;
1271 return (0);
1272
1273 /* fail a disk & optionally start reconstruction */
1274 case RAIDFRAME_FAIL_DISK:
1275
1276 if (raidPtr->Layout.map->faultsTolerated == 0) {
1277 /* Can't do this on a RAID 0!! */
1278 return(EINVAL);
1279 }
1280
1281 rr = (struct rf_recon_req *) data;
1282 rr->row = 0;
1283 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1284 return (EINVAL);
1285
1286
1287 RF_LOCK_MUTEX(raidPtr->mutex);
1288 if (raidPtr->status == rf_rs_reconstructing) {
1289 /* you can't fail a disk while we're reconstructing! */
1290 /* XXX wrong for RAID6 */
1291 RF_UNLOCK_MUTEX(raidPtr->mutex);
1292 return (EINVAL);
1293 }
1294 if ((raidPtr->Disks[rr->col].status ==
1295 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1296 /* some other component has failed. Let's not make
1297 things worse. XXX wrong for RAID6 */
1298 RF_UNLOCK_MUTEX(raidPtr->mutex);
1299 return (EINVAL);
1300 }
1301 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1302 /* Can't fail a spared disk! */
1303 RF_UNLOCK_MUTEX(raidPtr->mutex);
1304 return (EINVAL);
1305 }
1306 RF_UNLOCK_MUTEX(raidPtr->mutex);
1307
1308 /* make a copy of the recon request so that we don't rely on
1309 * the user's buffer */
1310 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1311 if (rrcopy == NULL)
1312 return(ENOMEM);
1313 memcpy(rrcopy, rr, sizeof(*rr));
1314 rrcopy->raidPtr = (void *) raidPtr;
1315
1316 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1317 rf_ReconThread,
1318 rrcopy,"raid_recon");
1319 return (0);
1320
1321 /* invoke a copyback operation after recon on whatever disk
1322 * needs it, if any */
1323 case RAIDFRAME_COPYBACK:
1324
1325 if (raidPtr->Layout.map->faultsTolerated == 0) {
1326 /* This makes no sense on a RAID 0!! */
1327 return(EINVAL);
1328 }
1329
1330 if (raidPtr->copyback_in_progress == 1) {
1331 /* Copyback is already in progress! */
1332 return(EINVAL);
1333 }
1334
1335 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1336 rf_CopybackThread,
1337 raidPtr,"raid_copyback");
1338 return (retcode);
1339
1340 /* return the percentage completion of reconstruction */
1341 case RAIDFRAME_CHECK_RECON_STATUS:
1342 if (raidPtr->Layout.map->faultsTolerated == 0) {
1343 /* This makes no sense on a RAID 0, so tell the
1344 user it's done. */
1345 *(int *) data = 100;
1346 return(0);
1347 }
1348 if (raidPtr->status != rf_rs_reconstructing)
1349 *(int *) data = 100;
1350 else {
1351 if (raidPtr->reconControl->numRUsTotal > 0) {
1352 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1353 } else {
1354 *(int *) data = 0;
1355 }
1356 }
1357 return (0);
1358 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1359 progressInfoPtr = (RF_ProgressInfo_t **) data;
1360 if (raidPtr->status != rf_rs_reconstructing) {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 } else {
1365 progressInfo.total =
1366 raidPtr->reconControl->numRUsTotal;
1367 progressInfo.completed =
1368 raidPtr->reconControl->numRUsComplete;
1369 progressInfo.remaining = progressInfo.total -
1370 progressInfo.completed;
1371 }
1372 retcode = copyout(&progressInfo, *progressInfoPtr,
1373 sizeof(RF_ProgressInfo_t));
1374 return (retcode);
1375
1376 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1377 if (raidPtr->Layout.map->faultsTolerated == 0) {
1378 /* This makes no sense on a RAID 0, so tell the
1379 user it's done. */
1380 *(int *) data = 100;
1381 return(0);
1382 }
1383 if (raidPtr->parity_rewrite_in_progress == 1) {
1384 *(int *) data = 100 *
1385 raidPtr->parity_rewrite_stripes_done /
1386 raidPtr->Layout.numStripe;
1387 } else {
1388 *(int *) data = 100;
1389 }
1390 return (0);
1391
1392 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1393 progressInfoPtr = (RF_ProgressInfo_t **) data;
1394 if (raidPtr->parity_rewrite_in_progress == 1) {
1395 progressInfo.total = raidPtr->Layout.numStripe;
1396 progressInfo.completed =
1397 raidPtr->parity_rewrite_stripes_done;
1398 progressInfo.remaining = progressInfo.total -
1399 progressInfo.completed;
1400 } else {
1401 progressInfo.remaining = 0;
1402 progressInfo.completed = 100;
1403 progressInfo.total = 100;
1404 }
1405 retcode = copyout(&progressInfo, *progressInfoPtr,
1406 sizeof(RF_ProgressInfo_t));
1407 return (retcode);
1408
1409 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1410 if (raidPtr->Layout.map->faultsTolerated == 0) {
1411 /* This makes no sense on a RAID 0 */
1412 *(int *) data = 100;
1413 return(0);
1414 }
1415 if (raidPtr->copyback_in_progress == 1) {
1416 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1417 raidPtr->Layout.numStripe;
1418 } else {
1419 *(int *) data = 100;
1420 }
1421 return (0);
1422
1423 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1424 progressInfoPtr = (RF_ProgressInfo_t **) data;
1425 if (raidPtr->copyback_in_progress == 1) {
1426 progressInfo.total = raidPtr->Layout.numStripe;
1427 progressInfo.completed =
1428 raidPtr->copyback_stripes_done;
1429 progressInfo.remaining = progressInfo.total -
1430 progressInfo.completed;
1431 } else {
1432 progressInfo.remaining = 0;
1433 progressInfo.completed = 100;
1434 progressInfo.total = 100;
1435 }
1436 retcode = copyout(&progressInfo, *progressInfoPtr,
1437 sizeof(RF_ProgressInfo_t));
1438 return (retcode);
1439
1440 /* the sparetable daemon calls this to wait for the kernel to
1441 * need a spare table. this ioctl does not return until a
1442 * spare table is needed. XXX -- calling mpsleep here in the
1443 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1444 * -- I should either compute the spare table in the kernel,
1445 * or have a different -- XXX XXX -- interface (a different
1446 * character device) for delivering the table -- XXX */
1447 #if 0
1448 case RAIDFRAME_SPARET_WAIT:
1449 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1450 while (!rf_sparet_wait_queue)
1451 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1452 waitreq = rf_sparet_wait_queue;
1453 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1454 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1455
1456 /* structure assignment */
1457 *((RF_SparetWait_t *) data) = *waitreq;
1458
1459 RF_Free(waitreq, sizeof(*waitreq));
1460 return (0);
1461
1462 /* wakes up a process waiting on SPARET_WAIT and puts an error
1463 * code in it that will cause the dameon to exit */
1464 case RAIDFRAME_ABORT_SPARET_WAIT:
1465 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1466 waitreq->fcol = -1;
1467 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1468 waitreq->next = rf_sparet_wait_queue;
1469 rf_sparet_wait_queue = waitreq;
1470 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1471 wakeup(&rf_sparet_wait_queue);
1472 return (0);
1473
1474 /* used by the spare table daemon to deliver a spare table
1475 * into the kernel */
1476 case RAIDFRAME_SEND_SPARET:
1477
1478 /* install the spare table */
1479 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1480
1481 /* respond to the requestor. the return status of the spare
1482 * table installation is passed in the "fcol" field */
1483 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1484 waitreq->fcol = retcode;
1485 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1486 waitreq->next = rf_sparet_resp_queue;
1487 rf_sparet_resp_queue = waitreq;
1488 wakeup(&rf_sparet_resp_queue);
1489 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1490
1491 return (retcode);
1492 #endif
1493
1494 default:
1495 break; /* fall through to the os-specific code below */
1496
1497 }
1498
1499 if (!raidPtr->valid)
1500 return (EINVAL);
1501
1502 /*
1503 * Add support for "regular" device ioctls here.
1504 */
1505
1506 switch (cmd) {
1507 case DIOCGDINFO:
1508 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1509 break;
1510 #ifdef __HAVE_OLD_DISKLABEL
1511 case ODIOCGDINFO:
1512 newlabel = *(rs->sc_dkdev.dk_label);
1513 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1514 return ENOTTY;
1515 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1516 break;
1517 #endif
1518
1519 case DIOCGPART:
1520 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1521 ((struct partinfo *) data)->part =
1522 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1523 break;
1524
1525 case DIOCWDINFO:
1526 case DIOCSDINFO:
1527 #ifdef __HAVE_OLD_DISKLABEL
1528 case ODIOCWDINFO:
1529 case ODIOCSDINFO:
1530 #endif
1531 {
1532 struct disklabel *lp;
1533 #ifdef __HAVE_OLD_DISKLABEL
1534 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1535 memset(&newlabel, 0, sizeof newlabel);
1536 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1537 lp = &newlabel;
1538 } else
1539 #endif
1540 lp = (struct disklabel *)data;
1541
1542 if ((error = raidlock(rs)) != 0)
1543 return (error);
1544
1545 rs->sc_flags |= RAIDF_LABELLING;
1546
1547 error = setdisklabel(rs->sc_dkdev.dk_label,
1548 lp, 0, rs->sc_dkdev.dk_cpulabel);
1549 if (error == 0) {
1550 if (cmd == DIOCWDINFO
1551 #ifdef __HAVE_OLD_DISKLABEL
1552 || cmd == ODIOCWDINFO
1553 #endif
1554 )
1555 error = writedisklabel(RAIDLABELDEV(dev),
1556 raidstrategy, rs->sc_dkdev.dk_label,
1557 rs->sc_dkdev.dk_cpulabel);
1558 }
1559 rs->sc_flags &= ~RAIDF_LABELLING;
1560
1561 raidunlock(rs);
1562
1563 if (error)
1564 return (error);
1565 break;
1566 }
1567
1568 case DIOCWLABEL:
1569 if (*(int *) data != 0)
1570 rs->sc_flags |= RAIDF_WLABEL;
1571 else
1572 rs->sc_flags &= ~RAIDF_WLABEL;
1573 break;
1574
1575 case DIOCGDEFLABEL:
1576 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1577 break;
1578
1579 #ifdef __HAVE_OLD_DISKLABEL
1580 case ODIOCGDEFLABEL:
1581 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1582 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1583 return ENOTTY;
1584 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1585 break;
1586 #endif
1587
1588 default:
1589 retcode = ENOTTY;
1590 }
1591 return (retcode);
1592
1593 }
1594
1595
1596 /* raidinit -- complete the rest of the initialization for the
1597 RAIDframe device. */
1598
1599
1600 static void
1601 raidinit(RF_Raid_t *raidPtr)
1602 {
1603 struct raid_softc *rs;
1604 int unit;
1605
1606 unit = raidPtr->raidid;
1607
1608 rs = &raid_softc[unit];
1609
1610 /* XXX should check return code first... */
1611 rs->sc_flags |= RAIDF_INITED;
1612
1613 /* XXX doesn't check bounds. */
1614 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1615
1616 rs->sc_dkdev.dk_name = rs->sc_xname;
1617
1618 /* disk_attach actually creates space for the CPU disklabel, among
1619 * other things, so it's critical to call this *BEFORE* we try putzing
1620 * with disklabels. */
1621
1622 pseudo_disk_attach(&rs->sc_dkdev);
1623
1624 /* XXX There may be a weird interaction here between this, and
1625 * protectedSectors, as used in RAIDframe. */
1626
1627 rs->sc_size = raidPtr->totalSectors;
1628 }
1629 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1630 /* wake up the daemon & tell it to get us a spare table
1631 * XXX
1632 * the entries in the queues should be tagged with the raidPtr
1633 * so that in the extremely rare case that two recons happen at once,
1634 * we know for which device were requesting a spare table
1635 * XXX
1636 *
1637 * XXX This code is not currently used. GO
1638 */
1639 int
1640 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1641 {
1642 int retcode;
1643
1644 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1645 req->next = rf_sparet_wait_queue;
1646 rf_sparet_wait_queue = req;
1647 wakeup(&rf_sparet_wait_queue);
1648
1649 /* mpsleep unlocks the mutex */
1650 while (!rf_sparet_resp_queue) {
1651 tsleep(&rf_sparet_resp_queue, PRIBIO,
1652 "raidframe getsparetable", 0);
1653 }
1654 req = rf_sparet_resp_queue;
1655 rf_sparet_resp_queue = req->next;
1656 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1657
1658 retcode = req->fcol;
1659 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1660 * alloc'd */
1661 return (retcode);
1662 }
1663 #endif
1664
1665 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1666 * bp & passes it down.
1667 * any calls originating in the kernel must use non-blocking I/O
1668 * do some extra sanity checking to return "appropriate" error values for
1669 * certain conditions (to make some standard utilities work)
1670 *
1671 * Formerly known as: rf_DoAccessKernel
1672 */
1673 void
1674 raidstart(RF_Raid_t *raidPtr)
1675 {
1676 RF_SectorCount_t num_blocks, pb, sum;
1677 RF_RaidAddr_t raid_addr;
1678 struct partition *pp;
1679 daddr_t blocknum;
1680 int unit;
1681 struct raid_softc *rs;
1682 int do_async;
1683 struct buf *bp;
1684 int rc;
1685
1686 unit = raidPtr->raidid;
1687 rs = &raid_softc[unit];
1688
1689 /* quick check to see if anything has died recently */
1690 RF_LOCK_MUTEX(raidPtr->mutex);
1691 if (raidPtr->numNewFailures > 0) {
1692 RF_UNLOCK_MUTEX(raidPtr->mutex);
1693 rf_update_component_labels(raidPtr,
1694 RF_NORMAL_COMPONENT_UPDATE);
1695 RF_LOCK_MUTEX(raidPtr->mutex);
1696 raidPtr->numNewFailures--;
1697 }
1698
1699 /* Check to see if we're at the limit... */
1700 while (raidPtr->openings > 0) {
1701 RF_UNLOCK_MUTEX(raidPtr->mutex);
1702
1703 /* get the next item, if any, from the queue */
1704 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1705 /* nothing more to do */
1706 return;
1707 }
1708
1709 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1710 * partition.. Need to make it absolute to the underlying
1711 * device.. */
1712
1713 blocknum = bp->b_blkno;
1714 if (DISKPART(bp->b_dev) != RAW_PART) {
1715 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1716 blocknum += pp->p_offset;
1717 }
1718
1719 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1720 (int) blocknum));
1721
1722 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1723 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1724
1725 /* *THIS* is where we adjust what block we're going to...
1726 * but DO NOT TOUCH bp->b_blkno!!! */
1727 raid_addr = blocknum;
1728
1729 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1730 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1731 sum = raid_addr + num_blocks + pb;
1732 if (1 || rf_debugKernelAccess) {
1733 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1734 (int) raid_addr, (int) sum, (int) num_blocks,
1735 (int) pb, (int) bp->b_resid));
1736 }
1737 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1738 || (sum < num_blocks) || (sum < pb)) {
1739 bp->b_error = ENOSPC;
1740 bp->b_flags |= B_ERROR;
1741 bp->b_resid = bp->b_bcount;
1742 biodone(bp);
1743 RF_LOCK_MUTEX(raidPtr->mutex);
1744 continue;
1745 }
1746 /*
1747 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1748 */
1749
1750 if (bp->b_bcount & raidPtr->sectorMask) {
1751 bp->b_error = EINVAL;
1752 bp->b_flags |= B_ERROR;
1753 bp->b_resid = bp->b_bcount;
1754 biodone(bp);
1755 RF_LOCK_MUTEX(raidPtr->mutex);
1756 continue;
1757
1758 }
1759 db1_printf(("Calling DoAccess..\n"));
1760
1761
1762 RF_LOCK_MUTEX(raidPtr->mutex);
1763 raidPtr->openings--;
1764 RF_UNLOCK_MUTEX(raidPtr->mutex);
1765
1766 /*
1767 * Everything is async.
1768 */
1769 do_async = 1;
1770
1771 disk_busy(&rs->sc_dkdev);
1772
1773 /* XXX we're still at splbio() here... do we *really*
1774 need to be? */
1775
1776 /* don't ever condition on bp->b_flags & B_WRITE.
1777 * always condition on B_READ instead */
1778
1779 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1780 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1781 do_async, raid_addr, num_blocks,
1782 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1783
1784 if (rc) {
1785 bp->b_error = rc;
1786 bp->b_flags |= B_ERROR;
1787 bp->b_resid = bp->b_bcount;
1788 biodone(bp);
1789 /* continue loop */
1790 }
1791
1792 RF_LOCK_MUTEX(raidPtr->mutex);
1793 }
1794 RF_UNLOCK_MUTEX(raidPtr->mutex);
1795 }
1796
1797
1798
1799
1800 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1801
1802 int
1803 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1804 {
1805 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1806 struct buf *bp;
1807
1808 req->queue = queue;
1809
1810 #if DIAGNOSTIC
1811 if (queue->raidPtr->raidid >= numraid) {
1812 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1813 numraid);
1814 panic("Invalid Unit number in rf_DispatchKernelIO");
1815 }
1816 #endif
1817
1818 bp = req->bp;
1819
1820 switch (req->type) {
1821 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1822 /* XXX need to do something extra here.. */
1823 /* I'm leaving this in, as I've never actually seen it used,
1824 * and I'd like folks to report it... GO */
1825 printf(("WAKEUP CALLED\n"));
1826 queue->numOutstanding++;
1827
1828 bp->b_flags = 0;
1829 bp->b_private = req;
1830
1831 KernelWakeupFunc(bp);
1832 break;
1833
1834 case RF_IO_TYPE_READ:
1835 case RF_IO_TYPE_WRITE:
1836 #if RF_ACC_TRACE > 0
1837 if (req->tracerec) {
1838 RF_ETIMER_START(req->tracerec->timer);
1839 }
1840 #endif
1841 InitBP(bp, queue->rf_cinfo->ci_vp,
1842 op, queue->rf_cinfo->ci_dev,
1843 req->sectorOffset, req->numSector,
1844 req->buf, KernelWakeupFunc, (void *) req,
1845 queue->raidPtr->logBytesPerSector, req->b_proc);
1846
1847 if (rf_debugKernelAccess) {
1848 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1849 (long) bp->b_blkno));
1850 }
1851 queue->numOutstanding++;
1852 queue->last_deq_sector = req->sectorOffset;
1853 /* acc wouldn't have been let in if there were any pending
1854 * reqs at any other priority */
1855 queue->curPriority = req->priority;
1856
1857 db1_printf(("Going for %c to unit %d col %d\n",
1858 req->type, queue->raidPtr->raidid,
1859 queue->col));
1860 db1_printf(("sector %d count %d (%d bytes) %d\n",
1861 (int) req->sectorOffset, (int) req->numSector,
1862 (int) (req->numSector <<
1863 queue->raidPtr->logBytesPerSector),
1864 (int) queue->raidPtr->logBytesPerSector));
1865 VOP_STRATEGY(bp->b_vp, bp);
1866
1867 break;
1868
1869 default:
1870 panic("bad req->type in rf_DispatchKernelIO");
1871 }
1872 db1_printf(("Exiting from DispatchKernelIO\n"));
1873
1874 return (0);
1875 }
1876 /* this is the callback function associated with a I/O invoked from
1877 kernel code.
1878 */
1879 static void
1880 KernelWakeupFunc(struct buf *bp)
1881 {
1882 RF_DiskQueueData_t *req = NULL;
1883 RF_DiskQueue_t *queue;
1884 int s;
1885
1886 s = splbio();
1887 db1_printf(("recovering the request queue:\n"));
1888 req = bp->b_private;
1889
1890 queue = (RF_DiskQueue_t *) req->queue;
1891
1892 #if RF_ACC_TRACE > 0
1893 if (req->tracerec) {
1894 RF_ETIMER_STOP(req->tracerec->timer);
1895 RF_ETIMER_EVAL(req->tracerec->timer);
1896 RF_LOCK_MUTEX(rf_tracing_mutex);
1897 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1898 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1899 req->tracerec->num_phys_ios++;
1900 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1901 }
1902 #endif
1903
1904 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1905 * ballistic, and mark the component as hosed... */
1906
1907 if (bp->b_flags & B_ERROR) {
1908 /* Mark the disk as dead */
1909 /* but only mark it once... */
1910 /* and only if it wouldn't leave this RAID set
1911 completely broken */
1912 if (((queue->raidPtr->Disks[queue->col].status ==
1913 rf_ds_optimal) ||
1914 (queue->raidPtr->Disks[queue->col].status ==
1915 rf_ds_used_spare)) &&
1916 (queue->raidPtr->numFailures <
1917 queue->raidPtr->Layout.map->faultsTolerated)) {
1918 printf("raid%d: IO Error. Marking %s as failed.\n",
1919 queue->raidPtr->raidid,
1920 queue->raidPtr->Disks[queue->col].devname);
1921 queue->raidPtr->Disks[queue->col].status =
1922 rf_ds_failed;
1923 queue->raidPtr->status = rf_rs_degraded;
1924 queue->raidPtr->numFailures++;
1925 queue->raidPtr->numNewFailures++;
1926 } else { /* Disk is already dead... */
1927 /* printf("Disk already marked as dead!\n"); */
1928 }
1929
1930 }
1931
1932 /* Fill in the error value */
1933
1934 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1935
1936 simple_lock(&queue->raidPtr->iodone_lock);
1937
1938 /* Drop this one on the "finished" queue... */
1939 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1940
1941 /* Let the raidio thread know there is work to be done. */
1942 wakeup(&(queue->raidPtr->iodone));
1943
1944 simple_unlock(&queue->raidPtr->iodone_lock);
1945
1946 splx(s);
1947 }
1948
1949
1950
1951 /*
1952 * initialize a buf structure for doing an I/O in the kernel.
1953 */
1954 static void
1955 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1956 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1957 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1958 struct proc *b_proc)
1959 {
1960 /* bp->b_flags = B_PHYS | rw_flag; */
1961 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1962 bp->b_bcount = numSect << logBytesPerSector;
1963 bp->b_bufsize = bp->b_bcount;
1964 bp->b_error = 0;
1965 bp->b_dev = dev;
1966 bp->b_data = bf;
1967 bp->b_blkno = startSect;
1968 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1969 if (bp->b_bcount == 0) {
1970 panic("bp->b_bcount is zero in InitBP!!");
1971 }
1972 bp->b_proc = b_proc;
1973 bp->b_iodone = cbFunc;
1974 bp->b_private = cbArg;
1975 bp->b_vp = b_vp;
1976 if ((bp->b_flags & B_READ) == 0) {
1977 bp->b_vp->v_numoutput++;
1978 }
1979
1980 }
1981
1982 static void
1983 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1984 struct disklabel *lp)
1985 {
1986 memset(lp, 0, sizeof(*lp));
1987
1988 /* fabricate a label... */
1989 lp->d_secperunit = raidPtr->totalSectors;
1990 lp->d_secsize = raidPtr->bytesPerSector;
1991 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1992 lp->d_ntracks = 4 * raidPtr->numCol;
1993 lp->d_ncylinders = raidPtr->totalSectors /
1994 (lp->d_nsectors * lp->d_ntracks);
1995 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1996
1997 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1998 lp->d_type = DTYPE_RAID;
1999 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2000 lp->d_rpm = 3600;
2001 lp->d_interleave = 1;
2002 lp->d_flags = 0;
2003
2004 lp->d_partitions[RAW_PART].p_offset = 0;
2005 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2006 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2007 lp->d_npartitions = RAW_PART + 1;
2008
2009 lp->d_magic = DISKMAGIC;
2010 lp->d_magic2 = DISKMAGIC;
2011 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2012
2013 }
2014 /*
2015 * Read the disklabel from the raid device. If one is not present, fake one
2016 * up.
2017 */
2018 static void
2019 raidgetdisklabel(dev_t dev)
2020 {
2021 int unit = raidunit(dev);
2022 struct raid_softc *rs = &raid_softc[unit];
2023 const char *errstring;
2024 struct disklabel *lp = rs->sc_dkdev.dk_label;
2025 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2026 RF_Raid_t *raidPtr;
2027
2028 db1_printf(("Getting the disklabel...\n"));
2029
2030 memset(clp, 0, sizeof(*clp));
2031
2032 raidPtr = raidPtrs[unit];
2033
2034 raidgetdefaultlabel(raidPtr, rs, lp);
2035
2036 /*
2037 * Call the generic disklabel extraction routine.
2038 */
2039 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2040 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2041 if (errstring)
2042 raidmakedisklabel(rs);
2043 else {
2044 int i;
2045 struct partition *pp;
2046
2047 /*
2048 * Sanity check whether the found disklabel is valid.
2049 *
2050 * This is necessary since total size of the raid device
2051 * may vary when an interleave is changed even though exactly
2052 * same componets are used, and old disklabel may used
2053 * if that is found.
2054 */
2055 if (lp->d_secperunit != rs->sc_size)
2056 printf("raid%d: WARNING: %s: "
2057 "total sector size in disklabel (%d) != "
2058 "the size of raid (%ld)\n", unit, rs->sc_xname,
2059 lp->d_secperunit, (long) rs->sc_size);
2060 for (i = 0; i < lp->d_npartitions; i++) {
2061 pp = &lp->d_partitions[i];
2062 if (pp->p_offset + pp->p_size > rs->sc_size)
2063 printf("raid%d: WARNING: %s: end of partition `%c' "
2064 "exceeds the size of raid (%ld)\n",
2065 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2066 }
2067 }
2068
2069 }
2070 /*
2071 * Take care of things one might want to take care of in the event
2072 * that a disklabel isn't present.
2073 */
2074 static void
2075 raidmakedisklabel(struct raid_softc *rs)
2076 {
2077 struct disklabel *lp = rs->sc_dkdev.dk_label;
2078 db1_printf(("Making a label..\n"));
2079
2080 /*
2081 * For historical reasons, if there's no disklabel present
2082 * the raw partition must be marked FS_BSDFFS.
2083 */
2084
2085 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2086
2087 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2088
2089 lp->d_checksum = dkcksum(lp);
2090 }
2091 /*
2092 * Lookup the provided name in the filesystem. If the file exists,
2093 * is a valid block device, and isn't being used by anyone else,
2094 * set *vpp to the file's vnode.
2095 * You'll find the original of this in ccd.c
2096 */
2097 int
2098 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2099 {
2100 struct nameidata nd;
2101 struct vnode *vp;
2102 struct proc *p;
2103 struct vattr va;
2104 int error;
2105
2106 if (l == NULL)
2107 return(ESRCH); /* Is ESRCH the best choice? */
2108 p = l->l_proc;
2109
2110 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2111 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2112 return (error);
2113 }
2114 vp = nd.ni_vp;
2115 if (vp->v_usecount > 1) {
2116 VOP_UNLOCK(vp, 0);
2117 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2118 return (EBUSY);
2119 }
2120 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) {
2121 VOP_UNLOCK(vp, 0);
2122 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2123 return (error);
2124 }
2125 /* XXX: eventually we should handle VREG, too. */
2126 if (va.va_type != VBLK) {
2127 VOP_UNLOCK(vp, 0);
2128 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2129 return (ENOTBLK);
2130 }
2131 VOP_UNLOCK(vp, 0);
2132 *vpp = vp;
2133 return (0);
2134 }
2135 /*
2136 * Wait interruptibly for an exclusive lock.
2137 *
2138 * XXX
2139 * Several drivers do this; it should be abstracted and made MP-safe.
2140 * (Hmm... where have we seen this warning before :-> GO )
2141 */
2142 static int
2143 raidlock(struct raid_softc *rs)
2144 {
2145 int error;
2146
2147 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2148 rs->sc_flags |= RAIDF_WANTED;
2149 if ((error =
2150 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2151 return (error);
2152 }
2153 rs->sc_flags |= RAIDF_LOCKED;
2154 return (0);
2155 }
2156 /*
2157 * Unlock and wake up any waiters.
2158 */
2159 static void
2160 raidunlock(struct raid_softc *rs)
2161 {
2162
2163 rs->sc_flags &= ~RAIDF_LOCKED;
2164 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2165 rs->sc_flags &= ~RAIDF_WANTED;
2166 wakeup(rs);
2167 }
2168 }
2169
2170
2171 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2172 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2173
2174 int
2175 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2176 {
2177 RF_ComponentLabel_t clabel;
2178 raidread_component_label(dev, b_vp, &clabel);
2179 clabel.mod_counter = mod_counter;
2180 clabel.clean = RF_RAID_CLEAN;
2181 raidwrite_component_label(dev, b_vp, &clabel);
2182 return(0);
2183 }
2184
2185
2186 int
2187 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2188 {
2189 RF_ComponentLabel_t clabel;
2190 raidread_component_label(dev, b_vp, &clabel);
2191 clabel.mod_counter = mod_counter;
2192 clabel.clean = RF_RAID_DIRTY;
2193 raidwrite_component_label(dev, b_vp, &clabel);
2194 return(0);
2195 }
2196
2197 /* ARGSUSED */
2198 int
2199 raidread_component_label(dev_t dev, struct vnode *b_vp,
2200 RF_ComponentLabel_t *clabel)
2201 {
2202 struct buf *bp;
2203 const struct bdevsw *bdev;
2204 int error;
2205
2206 /* XXX should probably ensure that we don't try to do this if
2207 someone has changed rf_protected_sectors. */
2208
2209 if (b_vp == NULL) {
2210 /* For whatever reason, this component is not valid.
2211 Don't try to read a component label from it. */
2212 return(EINVAL);
2213 }
2214
2215 /* get a block of the appropriate size... */
2216 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2217 bp->b_dev = dev;
2218
2219 /* get our ducks in a row for the read */
2220 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2221 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2222 bp->b_flags |= B_READ;
2223 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2224
2225 bdev = bdevsw_lookup(bp->b_dev);
2226 if (bdev == NULL)
2227 return (ENXIO);
2228 (*bdev->d_strategy)(bp);
2229
2230 error = biowait(bp);
2231
2232 if (!error) {
2233 memcpy(clabel, bp->b_data,
2234 sizeof(RF_ComponentLabel_t));
2235 }
2236
2237 brelse(bp);
2238 return(error);
2239 }
2240 /* ARGSUSED */
2241 int
2242 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2243 RF_ComponentLabel_t *clabel)
2244 {
2245 struct buf *bp;
2246 const struct bdevsw *bdev;
2247 int error;
2248
2249 /* get a block of the appropriate size... */
2250 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2251 bp->b_dev = dev;
2252
2253 /* get our ducks in a row for the write */
2254 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2255 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2256 bp->b_flags |= B_WRITE;
2257 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2258
2259 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2260
2261 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2262
2263 bdev = bdevsw_lookup(bp->b_dev);
2264 if (bdev == NULL)
2265 return (ENXIO);
2266 (*bdev->d_strategy)(bp);
2267 error = biowait(bp);
2268 brelse(bp);
2269 if (error) {
2270 #if 1
2271 printf("Failed to write RAID component info!\n");
2272 #endif
2273 }
2274
2275 return(error);
2276 }
2277
2278 void
2279 rf_markalldirty(RF_Raid_t *raidPtr)
2280 {
2281 RF_ComponentLabel_t clabel;
2282 int sparecol;
2283 int c;
2284 int j;
2285 int scol = -1;
2286
2287 raidPtr->mod_counter++;
2288 for (c = 0; c < raidPtr->numCol; c++) {
2289 /* we don't want to touch (at all) a disk that has
2290 failed */
2291 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2292 raidread_component_label(
2293 raidPtr->Disks[c].dev,
2294 raidPtr->raid_cinfo[c].ci_vp,
2295 &clabel);
2296 if (clabel.status == rf_ds_spared) {
2297 /* XXX do something special...
2298 but whatever you do, don't
2299 try to access it!! */
2300 } else {
2301 raidmarkdirty(
2302 raidPtr->Disks[c].dev,
2303 raidPtr->raid_cinfo[c].ci_vp,
2304 raidPtr->mod_counter);
2305 }
2306 }
2307 }
2308
2309 for( c = 0; c < raidPtr->numSpare ; c++) {
2310 sparecol = raidPtr->numCol + c;
2311 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2312 /*
2313
2314 we claim this disk is "optimal" if it's
2315 rf_ds_used_spare, as that means it should be
2316 directly substitutable for the disk it replaced.
2317 We note that too...
2318
2319 */
2320
2321 for(j=0;j<raidPtr->numCol;j++) {
2322 if (raidPtr->Disks[j].spareCol == sparecol) {
2323 scol = j;
2324 break;
2325 }
2326 }
2327
2328 raidread_component_label(
2329 raidPtr->Disks[sparecol].dev,
2330 raidPtr->raid_cinfo[sparecol].ci_vp,
2331 &clabel);
2332 /* make sure status is noted */
2333
2334 raid_init_component_label(raidPtr, &clabel);
2335
2336 clabel.row = 0;
2337 clabel.column = scol;
2338 /* Note: we *don't* change status from rf_ds_used_spare
2339 to rf_ds_optimal */
2340 /* clabel.status = rf_ds_optimal; */
2341
2342 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2343 raidPtr->raid_cinfo[sparecol].ci_vp,
2344 raidPtr->mod_counter);
2345 }
2346 }
2347 }
2348
2349
2350 void
2351 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2352 {
2353 RF_ComponentLabel_t clabel;
2354 int sparecol;
2355 int c;
2356 int j;
2357 int scol;
2358
2359 scol = -1;
2360
2361 /* XXX should do extra checks to make sure things really are clean,
2362 rather than blindly setting the clean bit... */
2363
2364 raidPtr->mod_counter++;
2365
2366 for (c = 0; c < raidPtr->numCol; c++) {
2367 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2368 raidread_component_label(
2369 raidPtr->Disks[c].dev,
2370 raidPtr->raid_cinfo[c].ci_vp,
2371 &clabel);
2372 /* make sure status is noted */
2373 clabel.status = rf_ds_optimal;
2374
2375 /* bump the counter */
2376 clabel.mod_counter = raidPtr->mod_counter;
2377
2378 raidwrite_component_label(
2379 raidPtr->Disks[c].dev,
2380 raidPtr->raid_cinfo[c].ci_vp,
2381 &clabel);
2382 if (final == RF_FINAL_COMPONENT_UPDATE) {
2383 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2384 raidmarkclean(
2385 raidPtr->Disks[c].dev,
2386 raidPtr->raid_cinfo[c].ci_vp,
2387 raidPtr->mod_counter);
2388 }
2389 }
2390 }
2391 /* else we don't touch it.. */
2392 }
2393
2394 for( c = 0; c < raidPtr->numSpare ; c++) {
2395 sparecol = raidPtr->numCol + c;
2396 /* Need to ensure that the reconstruct actually completed! */
2397 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2398 /*
2399
2400 we claim this disk is "optimal" if it's
2401 rf_ds_used_spare, as that means it should be
2402 directly substitutable for the disk it replaced.
2403 We note that too...
2404
2405 */
2406
2407 for(j=0;j<raidPtr->numCol;j++) {
2408 if (raidPtr->Disks[j].spareCol == sparecol) {
2409 scol = j;
2410 break;
2411 }
2412 }
2413
2414 /* XXX shouldn't *really* need this... */
2415 raidread_component_label(
2416 raidPtr->Disks[sparecol].dev,
2417 raidPtr->raid_cinfo[sparecol].ci_vp,
2418 &clabel);
2419 /* make sure status is noted */
2420
2421 raid_init_component_label(raidPtr, &clabel);
2422
2423 clabel.mod_counter = raidPtr->mod_counter;
2424 clabel.column = scol;
2425 clabel.status = rf_ds_optimal;
2426
2427 raidwrite_component_label(
2428 raidPtr->Disks[sparecol].dev,
2429 raidPtr->raid_cinfo[sparecol].ci_vp,
2430 &clabel);
2431 if (final == RF_FINAL_COMPONENT_UPDATE) {
2432 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2433 raidmarkclean( raidPtr->Disks[sparecol].dev,
2434 raidPtr->raid_cinfo[sparecol].ci_vp,
2435 raidPtr->mod_counter);
2436 }
2437 }
2438 }
2439 }
2440 }
2441
2442 void
2443 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2444 {
2445 struct proc *p;
2446 struct lwp *l;
2447
2448 p = raidPtr->engine_thread;
2449 l = LIST_FIRST(&p->p_lwps);
2450
2451 if (vp != NULL) {
2452 if (auto_configured == 1) {
2453 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2454 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2455 vput(vp);
2456
2457 } else {
2458 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2459 }
2460 }
2461 }
2462
2463
2464 void
2465 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2466 {
2467 int r,c;
2468 struct vnode *vp;
2469 int acd;
2470
2471
2472 /* We take this opportunity to close the vnodes like we should.. */
2473
2474 for (c = 0; c < raidPtr->numCol; c++) {
2475 vp = raidPtr->raid_cinfo[c].ci_vp;
2476 acd = raidPtr->Disks[c].auto_configured;
2477 rf_close_component(raidPtr, vp, acd);
2478 raidPtr->raid_cinfo[c].ci_vp = NULL;
2479 raidPtr->Disks[c].auto_configured = 0;
2480 }
2481
2482 for (r = 0; r < raidPtr->numSpare; r++) {
2483 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2484 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2485 rf_close_component(raidPtr, vp, acd);
2486 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2487 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2488 }
2489 }
2490
2491
2492 void
2493 rf_ReconThread(struct rf_recon_req *req)
2494 {
2495 int s;
2496 RF_Raid_t *raidPtr;
2497
2498 s = splbio();
2499 raidPtr = (RF_Raid_t *) req->raidPtr;
2500 raidPtr->recon_in_progress = 1;
2501
2502 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2503 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2504
2505 RF_Free(req, sizeof(*req));
2506
2507 raidPtr->recon_in_progress = 0;
2508 splx(s);
2509
2510 /* That's all... */
2511 kthread_exit(0); /* does not return */
2512 }
2513
2514 void
2515 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2516 {
2517 int retcode;
2518 int s;
2519
2520 raidPtr->parity_rewrite_stripes_done = 0;
2521 raidPtr->parity_rewrite_in_progress = 1;
2522 s = splbio();
2523 retcode = rf_RewriteParity(raidPtr);
2524 splx(s);
2525 if (retcode) {
2526 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2527 } else {
2528 /* set the clean bit! If we shutdown correctly,
2529 the clean bit on each component label will get
2530 set */
2531 raidPtr->parity_good = RF_RAID_CLEAN;
2532 }
2533 raidPtr->parity_rewrite_in_progress = 0;
2534
2535 /* Anyone waiting for us to stop? If so, inform them... */
2536 if (raidPtr->waitShutdown) {
2537 wakeup(&raidPtr->parity_rewrite_in_progress);
2538 }
2539
2540 /* That's all... */
2541 kthread_exit(0); /* does not return */
2542 }
2543
2544
2545 void
2546 rf_CopybackThread(RF_Raid_t *raidPtr)
2547 {
2548 int s;
2549
2550 raidPtr->copyback_in_progress = 1;
2551 s = splbio();
2552 rf_CopybackReconstructedData(raidPtr);
2553 splx(s);
2554 raidPtr->copyback_in_progress = 0;
2555
2556 /* That's all... */
2557 kthread_exit(0); /* does not return */
2558 }
2559
2560
2561 void
2562 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2563 {
2564 int s;
2565 RF_Raid_t *raidPtr;
2566
2567 s = splbio();
2568 raidPtr = req->raidPtr;
2569 raidPtr->recon_in_progress = 1;
2570 rf_ReconstructInPlace(raidPtr, req->col);
2571 RF_Free(req, sizeof(*req));
2572 raidPtr->recon_in_progress = 0;
2573 splx(s);
2574
2575 /* That's all... */
2576 kthread_exit(0); /* does not return */
2577 }
2578
2579 RF_AutoConfig_t *
2580 rf_find_raid_components()
2581 {
2582 struct vnode *vp;
2583 struct disklabel label;
2584 struct device *dv;
2585 dev_t dev;
2586 int bmajor;
2587 int error;
2588 int i;
2589 int good_one;
2590 RF_ComponentLabel_t *clabel;
2591 RF_AutoConfig_t *ac_list;
2592 RF_AutoConfig_t *ac;
2593
2594
2595 /* initialize the AutoConfig list */
2596 ac_list = NULL;
2597
2598 /* we begin by trolling through *all* the devices on the system */
2599
2600 for (dv = alldevs.tqh_first; dv != NULL;
2601 dv = dv->dv_list.tqe_next) {
2602
2603 /* we are only interested in disks... */
2604 if (device_class(dv) != DV_DISK)
2605 continue;
2606
2607 /* we don't care about floppies... */
2608 if (device_is_a(dv, "fd")) {
2609 continue;
2610 }
2611
2612 /* we don't care about CD's... */
2613 if (device_is_a(dv, "cd")) {
2614 continue;
2615 }
2616
2617 /* hdfd is the Atari/Hades floppy driver */
2618 if (device_is_a(dv, "hdfd")) {
2619 continue;
2620 }
2621
2622 /* fdisa is the Atari/Milan floppy driver */
2623 if (device_is_a(dv, "fdisa")) {
2624 continue;
2625 }
2626
2627 /* need to find the device_name_to_block_device_major stuff */
2628 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2629
2630 /* get a vnode for the raw partition of this disk */
2631
2632 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2633 if (bdevvp(dev, &vp))
2634 panic("RAID can't alloc vnode");
2635
2636 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2637
2638 if (error) {
2639 /* "Who cares." Continue looking
2640 for something that exists*/
2641 vput(vp);
2642 continue;
2643 }
2644
2645 /* Ok, the disk exists. Go get the disklabel. */
2646 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2647 if (error) {
2648 /*
2649 * XXX can't happen - open() would
2650 * have errored out (or faked up one)
2651 */
2652 if (error != ENOTTY)
2653 printf("RAIDframe: can't get label for dev "
2654 "%s (%d)\n", dv->dv_xname, error);
2655 }
2656
2657 /* don't need this any more. We'll allocate it again
2658 a little later if we really do... */
2659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2660 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2661 vput(vp);
2662
2663 if (error)
2664 continue;
2665
2666 for (i=0; i < label.d_npartitions; i++) {
2667 /* We only support partitions marked as RAID */
2668 if (label.d_partitions[i].p_fstype != FS_RAID)
2669 continue;
2670
2671 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2672 if (bdevvp(dev, &vp))
2673 panic("RAID can't alloc vnode");
2674
2675 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2676 if (error) {
2677 /* Whatever... */
2678 vput(vp);
2679 continue;
2680 }
2681
2682 good_one = 0;
2683
2684 clabel = (RF_ComponentLabel_t *)
2685 malloc(sizeof(RF_ComponentLabel_t),
2686 M_RAIDFRAME, M_NOWAIT);
2687 if (clabel == NULL) {
2688 while(ac_list) {
2689 ac = ac_list;
2690 if (ac->clabel)
2691 free(ac->clabel, M_RAIDFRAME);
2692 ac_list = ac_list->next;
2693 free(ac, M_RAIDFRAME);
2694 };
2695 printf("RAID auto config: out of memory!\n");
2696 return(NULL); /* XXX probably should panic? */
2697 }
2698
2699 if (!raidread_component_label(dev, vp, clabel)) {
2700 /* Got the label. Does it look reasonable? */
2701 if (rf_reasonable_label(clabel) &&
2702 (clabel->partitionSize <=
2703 label.d_partitions[i].p_size)) {
2704 #if DEBUG
2705 printf("Component on: %s%c: %d\n",
2706 dv->dv_xname, 'a'+i,
2707 label.d_partitions[i].p_size);
2708 rf_print_component_label(clabel);
2709 #endif
2710 /* if it's reasonable, add it,
2711 else ignore it. */
2712 ac = (RF_AutoConfig_t *)
2713 malloc(sizeof(RF_AutoConfig_t),
2714 M_RAIDFRAME,
2715 M_NOWAIT);
2716 if (ac == NULL) {
2717 /* XXX should panic?? */
2718 while(ac_list) {
2719 ac = ac_list;
2720 if (ac->clabel)
2721 free(ac->clabel,
2722 M_RAIDFRAME);
2723 ac_list = ac_list->next;
2724 free(ac, M_RAIDFRAME);
2725 }
2726 free(clabel, M_RAIDFRAME);
2727 return(NULL);
2728 }
2729
2730 snprintf(ac->devname,
2731 sizeof(ac->devname), "%s%c",
2732 dv->dv_xname, 'a'+i);
2733 ac->dev = dev;
2734 ac->vp = vp;
2735 ac->clabel = clabel;
2736 ac->next = ac_list;
2737 ac_list = ac;
2738 good_one = 1;
2739 }
2740 }
2741 if (!good_one) {
2742 /* cleanup */
2743 free(clabel, M_RAIDFRAME);
2744 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2745 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2746 vput(vp);
2747 }
2748 }
2749 }
2750 return(ac_list);
2751 }
2752
2753 static int
2754 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2755 {
2756
2757 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2758 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2759 ((clabel->clean == RF_RAID_CLEAN) ||
2760 (clabel->clean == RF_RAID_DIRTY)) &&
2761 clabel->row >=0 &&
2762 clabel->column >= 0 &&
2763 clabel->num_rows > 0 &&
2764 clabel->num_columns > 0 &&
2765 clabel->row < clabel->num_rows &&
2766 clabel->column < clabel->num_columns &&
2767 clabel->blockSize > 0 &&
2768 clabel->numBlocks > 0) {
2769 /* label looks reasonable enough... */
2770 return(1);
2771 }
2772 return(0);
2773 }
2774
2775
2776 #if DEBUG
2777 void
2778 rf_print_component_label(RF_ComponentLabel_t *clabel)
2779 {
2780 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2781 clabel->row, clabel->column,
2782 clabel->num_rows, clabel->num_columns);
2783 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2784 clabel->version, clabel->serial_number,
2785 clabel->mod_counter);
2786 printf(" Clean: %s Status: %d\n",
2787 clabel->clean ? "Yes" : "No", clabel->status );
2788 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2789 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2790 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2791 (char) clabel->parityConfig, clabel->blockSize,
2792 clabel->numBlocks);
2793 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2794 printf(" Contains root partition: %s\n",
2795 clabel->root_partition ? "Yes" : "No" );
2796 printf(" Last configured as: raid%d\n", clabel->last_unit );
2797 #if 0
2798 printf(" Config order: %d\n", clabel->config_order);
2799 #endif
2800
2801 }
2802 #endif
2803
2804 RF_ConfigSet_t *
2805 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2806 {
2807 RF_AutoConfig_t *ac;
2808 RF_ConfigSet_t *config_sets;
2809 RF_ConfigSet_t *cset;
2810 RF_AutoConfig_t *ac_next;
2811
2812
2813 config_sets = NULL;
2814
2815 /* Go through the AutoConfig list, and figure out which components
2816 belong to what sets. */
2817 ac = ac_list;
2818 while(ac!=NULL) {
2819 /* we're going to putz with ac->next, so save it here
2820 for use at the end of the loop */
2821 ac_next = ac->next;
2822
2823 if (config_sets == NULL) {
2824 /* will need at least this one... */
2825 config_sets = (RF_ConfigSet_t *)
2826 malloc(sizeof(RF_ConfigSet_t),
2827 M_RAIDFRAME, M_NOWAIT);
2828 if (config_sets == NULL) {
2829 panic("rf_create_auto_sets: No memory!");
2830 }
2831 /* this one is easy :) */
2832 config_sets->ac = ac;
2833 config_sets->next = NULL;
2834 config_sets->rootable = 0;
2835 ac->next = NULL;
2836 } else {
2837 /* which set does this component fit into? */
2838 cset = config_sets;
2839 while(cset!=NULL) {
2840 if (rf_does_it_fit(cset, ac)) {
2841 /* looks like it matches... */
2842 ac->next = cset->ac;
2843 cset->ac = ac;
2844 break;
2845 }
2846 cset = cset->next;
2847 }
2848 if (cset==NULL) {
2849 /* didn't find a match above... new set..*/
2850 cset = (RF_ConfigSet_t *)
2851 malloc(sizeof(RF_ConfigSet_t),
2852 M_RAIDFRAME, M_NOWAIT);
2853 if (cset == NULL) {
2854 panic("rf_create_auto_sets: No memory!");
2855 }
2856 cset->ac = ac;
2857 ac->next = NULL;
2858 cset->next = config_sets;
2859 cset->rootable = 0;
2860 config_sets = cset;
2861 }
2862 }
2863 ac = ac_next;
2864 }
2865
2866
2867 return(config_sets);
2868 }
2869
2870 static int
2871 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2872 {
2873 RF_ComponentLabel_t *clabel1, *clabel2;
2874
2875 /* If this one matches the *first* one in the set, that's good
2876 enough, since the other members of the set would have been
2877 through here too... */
2878 /* note that we are not checking partitionSize here..
2879
2880 Note that we are also not checking the mod_counters here.
2881 If everything else matches execpt the mod_counter, that's
2882 good enough for this test. We will deal with the mod_counters
2883 a little later in the autoconfiguration process.
2884
2885 (clabel1->mod_counter == clabel2->mod_counter) &&
2886
2887 The reason we don't check for this is that failed disks
2888 will have lower modification counts. If those disks are
2889 not added to the set they used to belong to, then they will
2890 form their own set, which may result in 2 different sets,
2891 for example, competing to be configured at raid0, and
2892 perhaps competing to be the root filesystem set. If the
2893 wrong ones get configured, or both attempt to become /,
2894 weird behaviour and or serious lossage will occur. Thus we
2895 need to bring them into the fold here, and kick them out at
2896 a later point.
2897
2898 */
2899
2900 clabel1 = cset->ac->clabel;
2901 clabel2 = ac->clabel;
2902 if ((clabel1->version == clabel2->version) &&
2903 (clabel1->serial_number == clabel2->serial_number) &&
2904 (clabel1->num_rows == clabel2->num_rows) &&
2905 (clabel1->num_columns == clabel2->num_columns) &&
2906 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2907 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2908 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2909 (clabel1->parityConfig == clabel2->parityConfig) &&
2910 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2911 (clabel1->blockSize == clabel2->blockSize) &&
2912 (clabel1->numBlocks == clabel2->numBlocks) &&
2913 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2914 (clabel1->root_partition == clabel2->root_partition) &&
2915 (clabel1->last_unit == clabel2->last_unit) &&
2916 (clabel1->config_order == clabel2->config_order)) {
2917 /* if it get's here, it almost *has* to be a match */
2918 } else {
2919 /* it's not consistent with somebody in the set..
2920 punt */
2921 return(0);
2922 }
2923 /* all was fine.. it must fit... */
2924 return(1);
2925 }
2926
2927 int
2928 rf_have_enough_components(RF_ConfigSet_t *cset)
2929 {
2930 RF_AutoConfig_t *ac;
2931 RF_AutoConfig_t *auto_config;
2932 RF_ComponentLabel_t *clabel;
2933 int c;
2934 int num_cols;
2935 int num_missing;
2936 int mod_counter;
2937 int mod_counter_found;
2938 int even_pair_failed;
2939 char parity_type;
2940
2941
2942 /* check to see that we have enough 'live' components
2943 of this set. If so, we can configure it if necessary */
2944
2945 num_cols = cset->ac->clabel->num_columns;
2946 parity_type = cset->ac->clabel->parityConfig;
2947
2948 /* XXX Check for duplicate components!?!?!? */
2949
2950 /* Determine what the mod_counter is supposed to be for this set. */
2951
2952 mod_counter_found = 0;
2953 mod_counter = 0;
2954 ac = cset->ac;
2955 while(ac!=NULL) {
2956 if (mod_counter_found==0) {
2957 mod_counter = ac->clabel->mod_counter;
2958 mod_counter_found = 1;
2959 } else {
2960 if (ac->clabel->mod_counter > mod_counter) {
2961 mod_counter = ac->clabel->mod_counter;
2962 }
2963 }
2964 ac = ac->next;
2965 }
2966
2967 num_missing = 0;
2968 auto_config = cset->ac;
2969
2970 even_pair_failed = 0;
2971 for(c=0; c<num_cols; c++) {
2972 ac = auto_config;
2973 while(ac!=NULL) {
2974 if ((ac->clabel->column == c) &&
2975 (ac->clabel->mod_counter == mod_counter)) {
2976 /* it's this one... */
2977 #if DEBUG
2978 printf("Found: %s at %d\n",
2979 ac->devname,c);
2980 #endif
2981 break;
2982 }
2983 ac=ac->next;
2984 }
2985 if (ac==NULL) {
2986 /* Didn't find one here! */
2987 /* special case for RAID 1, especially
2988 where there are more than 2
2989 components (where RAIDframe treats
2990 things a little differently :( ) */
2991 if (parity_type == '1') {
2992 if (c%2 == 0) { /* even component */
2993 even_pair_failed = 1;
2994 } else { /* odd component. If
2995 we're failed, and
2996 so is the even
2997 component, it's
2998 "Good Night, Charlie" */
2999 if (even_pair_failed == 1) {
3000 return(0);
3001 }
3002 }
3003 } else {
3004 /* normal accounting */
3005 num_missing++;
3006 }
3007 }
3008 if ((parity_type == '1') && (c%2 == 1)) {
3009 /* Just did an even component, and we didn't
3010 bail.. reset the even_pair_failed flag,
3011 and go on to the next component.... */
3012 even_pair_failed = 0;
3013 }
3014 }
3015
3016 clabel = cset->ac->clabel;
3017
3018 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3019 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3020 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3021 /* XXX this needs to be made *much* more general */
3022 /* Too many failures */
3023 return(0);
3024 }
3025 /* otherwise, all is well, and we've got enough to take a kick
3026 at autoconfiguring this set */
3027 return(1);
3028 }
3029
3030 void
3031 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3032 RF_Raid_t *raidPtr)
3033 {
3034 RF_ComponentLabel_t *clabel;
3035 int i;
3036
3037 clabel = ac->clabel;
3038
3039 /* 1. Fill in the common stuff */
3040 config->numRow = clabel->num_rows = 1;
3041 config->numCol = clabel->num_columns;
3042 config->numSpare = 0; /* XXX should this be set here? */
3043 config->sectPerSU = clabel->sectPerSU;
3044 config->SUsPerPU = clabel->SUsPerPU;
3045 config->SUsPerRU = clabel->SUsPerRU;
3046 config->parityConfig = clabel->parityConfig;
3047 /* XXX... */
3048 strcpy(config->diskQueueType,"fifo");
3049 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3050 config->layoutSpecificSize = 0; /* XXX ?? */
3051
3052 while(ac!=NULL) {
3053 /* row/col values will be in range due to the checks
3054 in reasonable_label() */
3055 strcpy(config->devnames[0][ac->clabel->column],
3056 ac->devname);
3057 ac = ac->next;
3058 }
3059
3060 for(i=0;i<RF_MAXDBGV;i++) {
3061 config->debugVars[i][0] = 0;
3062 }
3063 }
3064
3065 int
3066 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3067 {
3068 RF_ComponentLabel_t clabel;
3069 struct vnode *vp;
3070 dev_t dev;
3071 int column;
3072 int sparecol;
3073
3074 raidPtr->autoconfigure = new_value;
3075
3076 for(column=0; column<raidPtr->numCol; column++) {
3077 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3078 dev = raidPtr->Disks[column].dev;
3079 vp = raidPtr->raid_cinfo[column].ci_vp;
3080 raidread_component_label(dev, vp, &clabel);
3081 clabel.autoconfigure = new_value;
3082 raidwrite_component_label(dev, vp, &clabel);
3083 }
3084 }
3085 for(column = 0; column < raidPtr->numSpare ; column++) {
3086 sparecol = raidPtr->numCol + column;
3087 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3088 dev = raidPtr->Disks[sparecol].dev;
3089 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3090 raidread_component_label(dev, vp, &clabel);
3091 clabel.autoconfigure = new_value;
3092 raidwrite_component_label(dev, vp, &clabel);
3093 }
3094 }
3095 return(new_value);
3096 }
3097
3098 int
3099 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3100 {
3101 RF_ComponentLabel_t clabel;
3102 struct vnode *vp;
3103 dev_t dev;
3104 int column;
3105 int sparecol;
3106
3107 raidPtr->root_partition = new_value;
3108 for(column=0; column<raidPtr->numCol; column++) {
3109 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3110 dev = raidPtr->Disks[column].dev;
3111 vp = raidPtr->raid_cinfo[column].ci_vp;
3112 raidread_component_label(dev, vp, &clabel);
3113 clabel.root_partition = new_value;
3114 raidwrite_component_label(dev, vp, &clabel);
3115 }
3116 }
3117 for(column = 0; column < raidPtr->numSpare ; column++) {
3118 sparecol = raidPtr->numCol + column;
3119 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3120 dev = raidPtr->Disks[sparecol].dev;
3121 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3122 raidread_component_label(dev, vp, &clabel);
3123 clabel.root_partition = new_value;
3124 raidwrite_component_label(dev, vp, &clabel);
3125 }
3126 }
3127 return(new_value);
3128 }
3129
3130 void
3131 rf_release_all_vps(RF_ConfigSet_t *cset)
3132 {
3133 RF_AutoConfig_t *ac;
3134
3135 ac = cset->ac;
3136 while(ac!=NULL) {
3137 /* Close the vp, and give it back */
3138 if (ac->vp) {
3139 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3140 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3141 vput(ac->vp);
3142 ac->vp = NULL;
3143 }
3144 ac = ac->next;
3145 }
3146 }
3147
3148
3149 void
3150 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3151 {
3152 RF_AutoConfig_t *ac;
3153 RF_AutoConfig_t *next_ac;
3154
3155 ac = cset->ac;
3156 while(ac!=NULL) {
3157 next_ac = ac->next;
3158 /* nuke the label */
3159 free(ac->clabel, M_RAIDFRAME);
3160 /* cleanup the config structure */
3161 free(ac, M_RAIDFRAME);
3162 /* "next.." */
3163 ac = next_ac;
3164 }
3165 /* and, finally, nuke the config set */
3166 free(cset, M_RAIDFRAME);
3167 }
3168
3169
3170 void
3171 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3172 {
3173 /* current version number */
3174 clabel->version = RF_COMPONENT_LABEL_VERSION;
3175 clabel->serial_number = raidPtr->serial_number;
3176 clabel->mod_counter = raidPtr->mod_counter;
3177 clabel->num_rows = 1;
3178 clabel->num_columns = raidPtr->numCol;
3179 clabel->clean = RF_RAID_DIRTY; /* not clean */
3180 clabel->status = rf_ds_optimal; /* "It's good!" */
3181
3182 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3183 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3184 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3185
3186 clabel->blockSize = raidPtr->bytesPerSector;
3187 clabel->numBlocks = raidPtr->sectorsPerDisk;
3188
3189 /* XXX not portable */
3190 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3191 clabel->maxOutstanding = raidPtr->maxOutstanding;
3192 clabel->autoconfigure = raidPtr->autoconfigure;
3193 clabel->root_partition = raidPtr->root_partition;
3194 clabel->last_unit = raidPtr->raidid;
3195 clabel->config_order = raidPtr->config_order;
3196 }
3197
3198 int
3199 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3200 {
3201 RF_Raid_t *raidPtr;
3202 RF_Config_t *config;
3203 int raidID;
3204 int retcode;
3205
3206 #if DEBUG
3207 printf("RAID autoconfigure\n");
3208 #endif
3209
3210 retcode = 0;
3211 *unit = -1;
3212
3213 /* 1. Create a config structure */
3214
3215 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3216 M_RAIDFRAME,
3217 M_NOWAIT);
3218 if (config==NULL) {
3219 printf("Out of mem!?!?\n");
3220 /* XXX do something more intelligent here. */
3221 return(1);
3222 }
3223
3224 memset(config, 0, sizeof(RF_Config_t));
3225
3226 /*
3227 2. Figure out what RAID ID this one is supposed to live at
3228 See if we can get the same RAID dev that it was configured
3229 on last time..
3230 */
3231
3232 raidID = cset->ac->clabel->last_unit;
3233 if ((raidID < 0) || (raidID >= numraid)) {
3234 /* let's not wander off into lala land. */
3235 raidID = numraid - 1;
3236 }
3237 if (raidPtrs[raidID]->valid != 0) {
3238
3239 /*
3240 Nope... Go looking for an alternative...
3241 Start high so we don't immediately use raid0 if that's
3242 not taken.
3243 */
3244
3245 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3246 if (raidPtrs[raidID]->valid == 0) {
3247 /* can use this one! */
3248 break;
3249 }
3250 }
3251 }
3252
3253 if (raidID < 0) {
3254 /* punt... */
3255 printf("Unable to auto configure this set!\n");
3256 printf("(Out of RAID devs!)\n");
3257 free(config, M_RAIDFRAME);
3258 return(1);
3259 }
3260
3261 #if DEBUG
3262 printf("Configuring raid%d:\n",raidID);
3263 #endif
3264
3265 raidPtr = raidPtrs[raidID];
3266
3267 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3268 raidPtr->raidid = raidID;
3269 raidPtr->openings = RAIDOUTSTANDING;
3270
3271 /* 3. Build the configuration structure */
3272 rf_create_configuration(cset->ac, config, raidPtr);
3273
3274 /* 4. Do the configuration */
3275 retcode = rf_Configure(raidPtr, config, cset->ac);
3276
3277 if (retcode == 0) {
3278
3279 raidinit(raidPtrs[raidID]);
3280
3281 rf_markalldirty(raidPtrs[raidID]);
3282 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3283 if (cset->ac->clabel->root_partition==1) {
3284 /* everything configured just fine. Make a note
3285 that this set is eligible to be root. */
3286 cset->rootable = 1;
3287 /* XXX do this here? */
3288 raidPtrs[raidID]->root_partition = 1;
3289 }
3290 }
3291
3292 /* 5. Cleanup */
3293 free(config, M_RAIDFRAME);
3294
3295 *unit = raidID;
3296 return(retcode);
3297 }
3298
3299 void
3300 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3301 {
3302 struct buf *bp;
3303
3304 bp = (struct buf *)desc->bp;
3305 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3306 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3307 }
3308
3309 void
3310 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3311 size_t xmin, size_t xmax)
3312 {
3313 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3314 pool_sethiwat(p, xmax);
3315 pool_prime(p, xmin);
3316 pool_setlowat(p, xmin);
3317 }
3318
3319 /*
3320 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3321 * if there is IO pending and if that IO could possibly be done for a
3322 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3323 * otherwise.
3324 *
3325 */
3326
3327 int
3328 rf_buf_queue_check(int raidid)
3329 {
3330 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3331 raidPtrs[raidid]->openings > 0) {
3332 /* there is work to do */
3333 return 0;
3334 }
3335 /* default is nothing to do */
3336 return 1;
3337 }
3338