rf_netbsdkintf.c revision 1.49 1 /* $NetBSD: rf_netbsdkintf.c,v 1.49 2000/02/16 01:10:44 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that... */
208
209 struct raid_softc {
210 int sc_flags; /* flags */
211 int sc_cflags; /* configuration flags */
212 size_t sc_size; /* size of the raid device */
213 dev_t sc_dev; /* our device.. */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
288 RF_Raid_t *));
289 int rf_set_autoconfig __P((RF_Raid_t *, int));
290 int rf_set_rootpartition __P((RF_Raid_t *, int));
291 void rf_release_all_vps __P((RF_ConfigSet_t *));
292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place */
296 /* XXX ugly hack. */
297 const char *raid_rooty = "raid0";
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 /* Make some space for requested number of units... */
322
323 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
324 if (raidPtrs == NULL) {
325 panic("raidPtrs is NULL!!\n");
326 }
327
328 rc = rf_mutex_init(&rf_sparet_wait_mutex);
329 if (rc) {
330 RF_PANIC();
331 }
332
333 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
334
335 for (i = 0; i < numraid; i++)
336 raidPtrs[i] = NULL;
337 rc = rf_BootRaidframe();
338 if (rc == 0)
339 printf("Kernelized RAIDframe activated\n");
340 else
341 panic("Serious error booting RAID!!\n");
342
343 /* put together some datastructures like the CCD device does.. This
344 * lets us lock the device and what-not when it gets opened. */
345
346 raid_softc = (struct raid_softc *)
347 malloc(num * sizeof(struct raid_softc),
348 M_RAIDFRAME, M_NOWAIT);
349 if (raid_softc == NULL) {
350 printf("WARNING: no memory for RAIDframe driver\n");
351 return;
352 }
353 numraid = num;
354 bzero(raid_softc, num * sizeof(struct raid_softc));
355
356 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
357 M_RAIDFRAME, M_NOWAIT);
358 if (raidrootdev == NULL) {
359 panic("No memory for RAIDframe driver!!?!?!\n");
360 }
361
362 for (raidID = 0; raidID < num; raidID++) {
363 BUFQ_INIT(&raid_softc[raidID].buf_queue);
364
365 raidrootdev[raidID].dv_class = DV_DISK;
366 raidrootdev[raidID].dv_cfdata = NULL;
367 raidrootdev[raidID].dv_unit = raidID;
368 raidrootdev[raidID].dv_parent = NULL;
369 raidrootdev[raidID].dv_flags = 0;
370 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
371
372 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
373 (RF_Raid_t *));
374 if (raidPtrs[raidID] == NULL) {
375 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
376 numraid = raidID;
377 return;
378 }
379 }
380
381 if (raidautoconfig) {
382 /* 1. locate all RAID components on the system */
383
384 #if DEBUG
385 printf("Searching for raid components...\n");
386 #endif
387 ac_list = rf_find_raid_components();
388
389 /* 2. sort them into their respective sets */
390
391 config_sets = rf_create_auto_sets(ac_list);
392
393 /* 3. evaluate each set and configure the valid ones
394 This gets done in rf_buildroothack() */
395
396 /* schedule the creation of the thread to do the
397 "/ on RAID" stuff */
398
399 kthread_create(rf_buildroothack,config_sets);
400
401 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
402 /* XXXX pick raid0 for now... and this should be only done
403 if we find something that's bootable!!! */
404 #if 0
405 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
406 #endif
407 if (boothowto & RB_ASKNAME) {
408 /* We don't auto-config... */
409 } else {
410 /* They didn't ask, and we found something bootable... */
411 /* XXX pretend for now.. */
412 #if 0
413 booted_device = &raidrootdev[0];
414 #endif
415 }
416 }
417
418 }
419
420 void
421 rf_buildroothack(arg)
422 void *arg;
423 {
424 RF_ConfigSet_t *config_sets = arg;
425 RF_ConfigSet_t *cset;
426 RF_ConfigSet_t *next_cset;
427 RF_Raid_t *raidPtr;
428 RF_Config_t *config;
429 int raidID;
430 int retcode;
431
432 raidID=0;
433 cset = config_sets;
434 while(cset != NULL ) {
435 next_cset = cset->next;
436 if (cset->ac->clabel->autoconfigure==1) {
437 printf("Starting autoconfigure on raid%d\n",raidID);
438 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
439 M_RAIDFRAME,
440 M_NOWAIT);
441 if (config==NULL) {
442 printf("Out of mem!?!?\n");
443 /* XXX do something more intelligent here. */
444 return;
445 }
446 /* XXX raidID needs to be set correctly.. */
447 raidPtr = raidPtrs[raidID];
448 /* XXX all this stuff should be done SOMEWHERE ELSE! */
449 raidPtr->raidid = raidID;
450 raidPtr->openings = RAIDOUTSTANDING;
451 rf_create_configuration(cset->ac, config, raidPtr);
452 retcode = rf_Configure( raidPtr, config, cset->ac );
453
454 if (retcode == 0) {
455 #if DEBUG
456 printf("Calling raidinit()\n");
457 #endif
458 /* XXX the 0's below are bogus! */
459 retcode = raidinit(0, raidPtrs[raidID], 0);
460 if (retcode) {
461 printf("init returned: %d\n",retcode);
462 }
463 rf_markalldirty( raidPtrs[raidID] );
464 }
465 raidID++; /* XXX for now.. */
466 free(config, M_RAIDFRAME);
467 } else {
468 /* we're not autoconfiguring this set...
469 release the associated resources */
470 #if DEBUG
471 printf("Releasing vp's\n");
472 #endif
473 rf_release_all_vps(cset);
474 #if DEBUG
475 printf("Done.\n");
476 #endif
477 }
478 /* cleanup */
479 #if DEBUG
480 printf("Cleaning up config set\n");
481 #endif
482 rf_cleanup_config_set(cset);
483 #if DEBUG
484 printf("Done cleanup\n");
485 #endif
486 cset = next_cset;
487 }
488 }
489
490
491 int
492 raidsize(dev)
493 dev_t dev;
494 {
495 struct raid_softc *rs;
496 struct disklabel *lp;
497 int part, unit, omask, size;
498
499 unit = raidunit(dev);
500 if (unit >= numraid)
501 return (-1);
502 rs = &raid_softc[unit];
503
504 if ((rs->sc_flags & RAIDF_INITED) == 0)
505 return (-1);
506
507 part = DISKPART(dev);
508 omask = rs->sc_dkdev.dk_openmask & (1 << part);
509 lp = rs->sc_dkdev.dk_label;
510
511 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
512 return (-1);
513
514 if (lp->d_partitions[part].p_fstype != FS_SWAP)
515 size = -1;
516 else
517 size = lp->d_partitions[part].p_size *
518 (lp->d_secsize / DEV_BSIZE);
519
520 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
521 return (-1);
522
523 return (size);
524
525 }
526
527 int
528 raiddump(dev, blkno, va, size)
529 dev_t dev;
530 daddr_t blkno;
531 caddr_t va;
532 size_t size;
533 {
534 /* Not implemented. */
535 return ENXIO;
536 }
537 /* ARGSUSED */
538 int
539 raidopen(dev, flags, fmt, p)
540 dev_t dev;
541 int flags, fmt;
542 struct proc *p;
543 {
544 int unit = raidunit(dev);
545 struct raid_softc *rs;
546 struct disklabel *lp;
547 int part, pmask;
548 int error = 0;
549
550 if (unit >= numraid)
551 return (ENXIO);
552 rs = &raid_softc[unit];
553
554 if ((error = raidlock(rs)) != 0)
555 return (error);
556 lp = rs->sc_dkdev.dk_label;
557
558 part = DISKPART(dev);
559 pmask = (1 << part);
560
561 db1_printf(("Opening raid device number: %d partition: %d\n",
562 unit, part));
563
564
565 if ((rs->sc_flags & RAIDF_INITED) &&
566 (rs->sc_dkdev.dk_openmask == 0))
567 raidgetdisklabel(dev);
568
569 /* make sure that this partition exists */
570
571 if (part != RAW_PART) {
572 db1_printf(("Not a raw partition..\n"));
573 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
574 ((part >= lp->d_npartitions) ||
575 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
576 error = ENXIO;
577 raidunlock(rs);
578 db1_printf(("Bailing out...\n"));
579 return (error);
580 }
581 }
582 /* Prevent this unit from being unconfigured while open. */
583 switch (fmt) {
584 case S_IFCHR:
585 rs->sc_dkdev.dk_copenmask |= pmask;
586 break;
587
588 case S_IFBLK:
589 rs->sc_dkdev.dk_bopenmask |= pmask;
590 break;
591 }
592
593 if ((rs->sc_dkdev.dk_openmask == 0) &&
594 ((rs->sc_flags & RAIDF_INITED) != 0)) {
595 /* First one... mark things as dirty... Note that we *MUST*
596 have done a configure before this. I DO NOT WANT TO BE
597 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
598 THAT THEY BELONG TOGETHER!!!!! */
599 /* XXX should check to see if we're only open for reading
600 here... If so, we needn't do this, but then need some
601 other way of keeping track of what's happened.. */
602
603 rf_markalldirty( raidPtrs[unit] );
604 }
605
606
607 rs->sc_dkdev.dk_openmask =
608 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
609
610 raidunlock(rs);
611
612 return (error);
613
614
615 }
616 /* ARGSUSED */
617 int
618 raidclose(dev, flags, fmt, p)
619 dev_t dev;
620 int flags, fmt;
621 struct proc *p;
622 {
623 int unit = raidunit(dev);
624 struct raid_softc *rs;
625 int error = 0;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((error = raidlock(rs)) != 0)
633 return (error);
634
635 part = DISKPART(dev);
636
637 /* ...that much closer to allowing unconfiguration... */
638 switch (fmt) {
639 case S_IFCHR:
640 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
641 break;
642
643 case S_IFBLK:
644 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
645 break;
646 }
647 rs->sc_dkdev.dk_openmask =
648 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
649
650 if ((rs->sc_dkdev.dk_openmask == 0) &&
651 ((rs->sc_flags & RAIDF_INITED) != 0)) {
652 /* Last one... device is not unconfigured yet.
653 Device shutdown has taken care of setting the
654 clean bits if RAIDF_INITED is not set
655 mark things as clean... */
656 rf_update_component_labels( raidPtrs[unit] );
657 }
658
659 raidunlock(rs);
660 return (0);
661
662 }
663
664 void
665 raidstrategy(bp)
666 register struct buf *bp;
667 {
668 register int s;
669
670 unsigned int raidID = raidunit(bp->b_dev);
671 RF_Raid_t *raidPtr;
672 struct raid_softc *rs = &raid_softc[raidID];
673 struct disklabel *lp;
674 int wlabel;
675
676 if ((rs->sc_flags & RAIDF_INITED) ==0) {
677 bp->b_error = ENXIO;
678 bp->b_flags = B_ERROR;
679 bp->b_resid = bp->b_bcount;
680 biodone(bp);
681 return;
682 }
683 if (raidID >= numraid || !raidPtrs[raidID]) {
684 bp->b_error = ENODEV;
685 bp->b_flags |= B_ERROR;
686 bp->b_resid = bp->b_bcount;
687 biodone(bp);
688 return;
689 }
690 raidPtr = raidPtrs[raidID];
691 if (!raidPtr->valid) {
692 bp->b_error = ENODEV;
693 bp->b_flags |= B_ERROR;
694 bp->b_resid = bp->b_bcount;
695 biodone(bp);
696 return;
697 }
698 if (bp->b_bcount == 0) {
699 db1_printf(("b_bcount is zero..\n"));
700 biodone(bp);
701 return;
702 }
703 lp = rs->sc_dkdev.dk_label;
704
705 /*
706 * Do bounds checking and adjust transfer. If there's an
707 * error, the bounds check will flag that for us.
708 */
709
710 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
711 if (DISKPART(bp->b_dev) != RAW_PART)
712 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
713 db1_printf(("Bounds check failed!!:%d %d\n",
714 (int) bp->b_blkno, (int) wlabel));
715 biodone(bp);
716 return;
717 }
718 s = splbio();
719
720 bp->b_resid = 0;
721
722 /* stuff it onto our queue */
723 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
724
725 raidstart(raidPtrs[raidID]);
726
727 splx(s);
728 }
729 /* ARGSUSED */
730 int
731 raidread(dev, uio, flags)
732 dev_t dev;
733 struct uio *uio;
734 int flags;
735 {
736 int unit = raidunit(dev);
737 struct raid_softc *rs;
738 int part;
739
740 if (unit >= numraid)
741 return (ENXIO);
742 rs = &raid_softc[unit];
743
744 if ((rs->sc_flags & RAIDF_INITED) == 0)
745 return (ENXIO);
746 part = DISKPART(dev);
747
748 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
749
750 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
751
752 }
753 /* ARGSUSED */
754 int
755 raidwrite(dev, uio, flags)
756 dev_t dev;
757 struct uio *uio;
758 int flags;
759 {
760 int unit = raidunit(dev);
761 struct raid_softc *rs;
762
763 if (unit >= numraid)
764 return (ENXIO);
765 rs = &raid_softc[unit];
766
767 if ((rs->sc_flags & RAIDF_INITED) == 0)
768 return (ENXIO);
769 db1_printf(("raidwrite\n"));
770 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
771
772 }
773
774 int
775 raidioctl(dev, cmd, data, flag, p)
776 dev_t dev;
777 u_long cmd;
778 caddr_t data;
779 int flag;
780 struct proc *p;
781 {
782 int unit = raidunit(dev);
783 int error = 0;
784 int part, pmask;
785 struct raid_softc *rs;
786 RF_Config_t *k_cfg, *u_cfg;
787 RF_Raid_t *raidPtr;
788 RF_RaidDisk_t *diskPtr;
789 RF_AccTotals_t *totals;
790 RF_DeviceConfig_t *d_cfg, **ucfgp;
791 u_char *specific_buf;
792 int retcode = 0;
793 int row;
794 int column;
795 struct rf_recon_req *rrcopy, *rr;
796 RF_ComponentLabel_t *clabel;
797 RF_ComponentLabel_t ci_label;
798 RF_ComponentLabel_t **clabel_ptr;
799 RF_SingleComponent_t *sparePtr,*componentPtr;
800 RF_SingleComponent_t hot_spare;
801 RF_SingleComponent_t component;
802 int i, j, d;
803
804 if (unit >= numraid)
805 return (ENXIO);
806 rs = &raid_softc[unit];
807 raidPtr = raidPtrs[unit];
808
809 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
810 (int) DISKPART(dev), (int) unit, (int) cmd));
811
812 /* Must be open for writes for these commands... */
813 switch (cmd) {
814 case DIOCSDINFO:
815 case DIOCWDINFO:
816 case DIOCWLABEL:
817 if ((flag & FWRITE) == 0)
818 return (EBADF);
819 }
820
821 /* Must be initialized for these... */
822 switch (cmd) {
823 case DIOCGDINFO:
824 case DIOCSDINFO:
825 case DIOCWDINFO:
826 case DIOCGPART:
827 case DIOCWLABEL:
828 case DIOCGDEFLABEL:
829 case RAIDFRAME_SHUTDOWN:
830 case RAIDFRAME_REWRITEPARITY:
831 case RAIDFRAME_GET_INFO:
832 case RAIDFRAME_RESET_ACCTOTALS:
833 case RAIDFRAME_GET_ACCTOTALS:
834 case RAIDFRAME_KEEP_ACCTOTALS:
835 case RAIDFRAME_GET_SIZE:
836 case RAIDFRAME_FAIL_DISK:
837 case RAIDFRAME_COPYBACK:
838 case RAIDFRAME_CHECK_RECON_STATUS:
839 case RAIDFRAME_GET_COMPONENT_LABEL:
840 case RAIDFRAME_SET_COMPONENT_LABEL:
841 case RAIDFRAME_ADD_HOT_SPARE:
842 case RAIDFRAME_REMOVE_HOT_SPARE:
843 case RAIDFRAME_INIT_LABELS:
844 case RAIDFRAME_REBUILD_IN_PLACE:
845 case RAIDFRAME_CHECK_PARITY:
846 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
847 case RAIDFRAME_CHECK_COPYBACK_STATUS:
848 case RAIDFRAME_SET_AUTOCONFIG:
849 case RAIDFRAME_SET_ROOT:
850 if ((rs->sc_flags & RAIDF_INITED) == 0)
851 return (ENXIO);
852 }
853
854 switch (cmd) {
855
856 /* configure the system */
857 case RAIDFRAME_CONFIGURE:
858
859 if (raidPtr->valid) {
860 /* There is a valid RAID set running on this unit! */
861 printf("raid%d: Device already configured!\n",unit);
862 }
863
864 /* copy-in the configuration information */
865 /* data points to a pointer to the configuration structure */
866
867 u_cfg = *((RF_Config_t **) data);
868 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
869 if (k_cfg == NULL) {
870 return (ENOMEM);
871 }
872 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
873 sizeof(RF_Config_t));
874 if (retcode) {
875 RF_Free(k_cfg, sizeof(RF_Config_t));
876 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
877 retcode));
878 return (retcode);
879 }
880 /* allocate a buffer for the layout-specific data, and copy it
881 * in */
882 if (k_cfg->layoutSpecificSize) {
883 if (k_cfg->layoutSpecificSize > 10000) {
884 /* sanity check */
885 RF_Free(k_cfg, sizeof(RF_Config_t));
886 return (EINVAL);
887 }
888 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
889 (u_char *));
890 if (specific_buf == NULL) {
891 RF_Free(k_cfg, sizeof(RF_Config_t));
892 return (ENOMEM);
893 }
894 retcode = copyin(k_cfg->layoutSpecific,
895 (caddr_t) specific_buf,
896 k_cfg->layoutSpecificSize);
897 if (retcode) {
898 RF_Free(k_cfg, sizeof(RF_Config_t));
899 RF_Free(specific_buf,
900 k_cfg->layoutSpecificSize);
901 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
902 retcode));
903 return (retcode);
904 }
905 } else
906 specific_buf = NULL;
907 k_cfg->layoutSpecific = specific_buf;
908
909 /* should do some kind of sanity check on the configuration.
910 * Store the sum of all the bytes in the last byte? */
911
912 /* configure the system */
913
914 /*
915 * Clear the entire RAID descriptor, just to make sure
916 * there is no stale data left in the case of a
917 * reconfiguration
918 */
919 bzero((char *) raidPtr, sizeof(RF_Raid_t));
920 raidPtr->raidid = unit;
921
922 retcode = rf_Configure(raidPtr, k_cfg, NULL);
923
924 if (retcode == 0) {
925
926 /* allow this many simultaneous IO's to
927 this RAID device */
928 raidPtr->openings = RAIDOUTSTANDING;
929
930 retcode = raidinit(dev, raidPtr, unit);
931 rf_markalldirty( raidPtr );
932 }
933 /* free the buffers. No return code here. */
934 if (k_cfg->layoutSpecificSize) {
935 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
936 }
937 RF_Free(k_cfg, sizeof(RF_Config_t));
938
939 return (retcode);
940
941 /* shutdown the system */
942 case RAIDFRAME_SHUTDOWN:
943
944 if ((error = raidlock(rs)) != 0)
945 return (error);
946
947 /*
948 * If somebody has a partition mounted, we shouldn't
949 * shutdown.
950 */
951
952 part = DISKPART(dev);
953 pmask = (1 << part);
954 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
955 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
956 (rs->sc_dkdev.dk_copenmask & pmask))) {
957 raidunlock(rs);
958 return (EBUSY);
959 }
960
961 retcode = rf_Shutdown(raidPtr);
962
963 pool_destroy(&rs->sc_cbufpool);
964
965 /* It's no longer initialized... */
966 rs->sc_flags &= ~RAIDF_INITED;
967
968 /* Detach the disk. */
969 disk_detach(&rs->sc_dkdev);
970
971 raidunlock(rs);
972
973 return (retcode);
974 case RAIDFRAME_GET_COMPONENT_LABEL:
975 clabel_ptr = (RF_ComponentLabel_t **) data;
976 /* need to read the component label for the disk indicated
977 by row,column in clabel */
978
979 /* For practice, let's get it directly fromdisk, rather
980 than from the in-core copy */
981 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
982 (RF_ComponentLabel_t *));
983 if (clabel == NULL)
984 return (ENOMEM);
985
986 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
987
988 retcode = copyin( *clabel_ptr, clabel,
989 sizeof(RF_ComponentLabel_t));
990
991 if (retcode) {
992 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
993 return(retcode);
994 }
995
996 row = clabel->row;
997 column = clabel->column;
998
999 if ((row < 0) || (row >= raidPtr->numRow) ||
1000 (column < 0) || (column >= raidPtr->numCol)) {
1001 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1002 return(EINVAL);
1003 }
1004
1005 raidread_component_label(raidPtr->Disks[row][column].dev,
1006 raidPtr->raid_cinfo[row][column].ci_vp,
1007 clabel );
1008
1009 retcode = copyout((caddr_t) clabel,
1010 (caddr_t) *clabel_ptr,
1011 sizeof(RF_ComponentLabel_t));
1012 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1013 return (retcode);
1014
1015 case RAIDFRAME_SET_COMPONENT_LABEL:
1016 clabel = (RF_ComponentLabel_t *) data;
1017
1018 /* XXX check the label for valid stuff... */
1019 /* Note that some things *should not* get modified --
1020 the user should be re-initing the labels instead of
1021 trying to patch things.
1022 */
1023
1024 printf("Got component label:\n");
1025 printf("Version: %d\n",clabel->version);
1026 printf("Serial Number: %d\n",clabel->serial_number);
1027 printf("Mod counter: %d\n",clabel->mod_counter);
1028 printf("Row: %d\n", clabel->row);
1029 printf("Column: %d\n", clabel->column);
1030 printf("Num Rows: %d\n", clabel->num_rows);
1031 printf("Num Columns: %d\n", clabel->num_columns);
1032 printf("Clean: %d\n", clabel->clean);
1033 printf("Status: %d\n", clabel->status);
1034
1035 row = clabel->row;
1036 column = clabel->column;
1037
1038 if ((row < 0) || (row >= raidPtr->numRow) ||
1039 (column < 0) || (column >= raidPtr->numCol)) {
1040 return(EINVAL);
1041 }
1042
1043 /* XXX this isn't allowed to do anything for now :-) */
1044
1045 /* XXX and before it is, we need to fill in the rest
1046 of the fields!?!?!?! */
1047 #if 0
1048 raidwrite_component_label(
1049 raidPtr->Disks[row][column].dev,
1050 raidPtr->raid_cinfo[row][column].ci_vp,
1051 clabel );
1052 #endif
1053 return (0);
1054
1055 case RAIDFRAME_INIT_LABELS:
1056 clabel = (RF_ComponentLabel_t *) data;
1057 /*
1058 we only want the serial number from
1059 the above. We get all the rest of the information
1060 from the config that was used to create this RAID
1061 set.
1062 */
1063
1064 raidPtr->serial_number = clabel->serial_number;
1065
1066 raid_init_component_label(raidPtr, clabel);
1067
1068 for(row=0;row<raidPtr->numRow;row++) {
1069 ci_label.row = row;
1070 for(column=0;column<raidPtr->numCol;column++) {
1071 diskPtr = &raidPtr->Disks[row][column];
1072 ci_label.blockSize = diskPtr->blockSize;
1073 ci_label.numBlocks = diskPtr->numBlocks;
1074 ci_label.partitionSize = diskPtr->partitionSize;
1075 ci_label.column = column;
1076 raidwrite_component_label(
1077 raidPtr->Disks[row][column].dev,
1078 raidPtr->raid_cinfo[row][column].ci_vp,
1079 &ci_label );
1080 }
1081 }
1082
1083 return (retcode);
1084 case RAIDFRAME_SET_AUTOCONFIG:
1085 d = rf_set_autoconfig(raidPtr, *data);
1086 printf("New autoconfig value is: %d\n", d);
1087 *data = d;
1088 return (retcode);
1089
1090 case RAIDFRAME_SET_ROOT:
1091 d = rf_set_rootpartition(raidPtr, *data);
1092 printf("New rootpartition value is: %d\n", d);
1093 *data = d;
1094 return (retcode);
1095
1096 /* initialize all parity */
1097 case RAIDFRAME_REWRITEPARITY:
1098
1099 if (raidPtr->Layout.map->faultsTolerated == 0) {
1100 /* Parity for RAID 0 is trivially correct */
1101 raidPtr->parity_good = RF_RAID_CLEAN;
1102 return(0);
1103 }
1104
1105 if (raidPtr->parity_rewrite_in_progress == 1) {
1106 /* Re-write is already in progress! */
1107 return(EINVAL);
1108 }
1109
1110 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1111 rf_RewriteParityThread,
1112 raidPtr,"raid_parity");
1113 return (retcode);
1114
1115
1116 case RAIDFRAME_ADD_HOT_SPARE:
1117 sparePtr = (RF_SingleComponent_t *) data;
1118 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1119 printf("Adding spare\n");
1120 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1121 return(retcode);
1122
1123 case RAIDFRAME_REMOVE_HOT_SPARE:
1124 return(retcode);
1125
1126 case RAIDFRAME_REBUILD_IN_PLACE:
1127
1128 if (raidPtr->Layout.map->faultsTolerated == 0) {
1129 /* Can't do this on a RAID 0!! */
1130 return(EINVAL);
1131 }
1132
1133 if (raidPtr->recon_in_progress == 1) {
1134 /* a reconstruct is already in progress! */
1135 return(EINVAL);
1136 }
1137
1138 componentPtr = (RF_SingleComponent_t *) data;
1139 memcpy( &component, componentPtr,
1140 sizeof(RF_SingleComponent_t));
1141 row = component.row;
1142 column = component.column;
1143 printf("Rebuild: %d %d\n",row, column);
1144 if ((row < 0) || (row >= raidPtr->numRow) ||
1145 (column < 0) || (column >= raidPtr->numCol)) {
1146 return(EINVAL);
1147 }
1148
1149 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1150 if (rrcopy == NULL)
1151 return(ENOMEM);
1152
1153 rrcopy->raidPtr = (void *) raidPtr;
1154 rrcopy->row = row;
1155 rrcopy->col = column;
1156
1157 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1158 rf_ReconstructInPlaceThread,
1159 rrcopy,"raid_reconip");
1160 return(retcode);
1161
1162 case RAIDFRAME_GET_INFO:
1163 if (!raidPtr->valid)
1164 return (ENODEV);
1165 ucfgp = (RF_DeviceConfig_t **) data;
1166 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1167 (RF_DeviceConfig_t *));
1168 if (d_cfg == NULL)
1169 return (ENOMEM);
1170 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1171 d_cfg->rows = raidPtr->numRow;
1172 d_cfg->cols = raidPtr->numCol;
1173 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1174 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1175 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1176 return (ENOMEM);
1177 }
1178 d_cfg->nspares = raidPtr->numSpare;
1179 if (d_cfg->nspares >= RF_MAX_DISKS) {
1180 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1181 return (ENOMEM);
1182 }
1183 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1184 d = 0;
1185 for (i = 0; i < d_cfg->rows; i++) {
1186 for (j = 0; j < d_cfg->cols; j++) {
1187 d_cfg->devs[d] = raidPtr->Disks[i][j];
1188 d++;
1189 }
1190 }
1191 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1192 d_cfg->spares[i] = raidPtr->Disks[0][j];
1193 }
1194 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1195 sizeof(RF_DeviceConfig_t));
1196 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1197
1198 return (retcode);
1199
1200 case RAIDFRAME_CHECK_PARITY:
1201 *(int *) data = raidPtr->parity_good;
1202 return (0);
1203
1204 case RAIDFRAME_RESET_ACCTOTALS:
1205 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1206 return (0);
1207
1208 case RAIDFRAME_GET_ACCTOTALS:
1209 totals = (RF_AccTotals_t *) data;
1210 *totals = raidPtr->acc_totals;
1211 return (0);
1212
1213 case RAIDFRAME_KEEP_ACCTOTALS:
1214 raidPtr->keep_acc_totals = *(int *)data;
1215 return (0);
1216
1217 case RAIDFRAME_GET_SIZE:
1218 *(int *) data = raidPtr->totalSectors;
1219 return (0);
1220
1221 /* fail a disk & optionally start reconstruction */
1222 case RAIDFRAME_FAIL_DISK:
1223
1224 if (raidPtr->Layout.map->faultsTolerated == 0) {
1225 /* Can't do this on a RAID 0!! */
1226 return(EINVAL);
1227 }
1228
1229 rr = (struct rf_recon_req *) data;
1230
1231 if (rr->row < 0 || rr->row >= raidPtr->numRow
1232 || rr->col < 0 || rr->col >= raidPtr->numCol)
1233 return (EINVAL);
1234
1235 printf("raid%d: Failing the disk: row: %d col: %d\n",
1236 unit, rr->row, rr->col);
1237
1238 /* make a copy of the recon request so that we don't rely on
1239 * the user's buffer */
1240 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1241 if (rrcopy == NULL)
1242 return(ENOMEM);
1243 bcopy(rr, rrcopy, sizeof(*rr));
1244 rrcopy->raidPtr = (void *) raidPtr;
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1247 rf_ReconThread,
1248 rrcopy,"raid_recon");
1249 return (0);
1250
1251 /* invoke a copyback operation after recon on whatever disk
1252 * needs it, if any */
1253 case RAIDFRAME_COPYBACK:
1254
1255 if (raidPtr->Layout.map->faultsTolerated == 0) {
1256 /* This makes no sense on a RAID 0!! */
1257 return(EINVAL);
1258 }
1259
1260 if (raidPtr->copyback_in_progress == 1) {
1261 /* Copyback is already in progress! */
1262 return(EINVAL);
1263 }
1264
1265 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1266 rf_CopybackThread,
1267 raidPtr,"raid_copyback");
1268 return (retcode);
1269
1270 /* return the percentage completion of reconstruction */
1271 case RAIDFRAME_CHECK_RECON_STATUS:
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* This makes no sense on a RAID 0 */
1274 return(EINVAL);
1275 }
1276 row = 0; /* XXX we only consider a single row... */
1277 if (raidPtr->status[row] != rf_rs_reconstructing)
1278 *(int *) data = 100;
1279 else
1280 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1281 return (0);
1282
1283 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1284 if (raidPtr->Layout.map->faultsTolerated == 0) {
1285 /* This makes no sense on a RAID 0 */
1286 return(EINVAL);
1287 }
1288 if (raidPtr->parity_rewrite_in_progress == 1) {
1289 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1290 } else {
1291 *(int *) data = 100;
1292 }
1293 return (0);
1294
1295 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1296 if (raidPtr->Layout.map->faultsTolerated == 0) {
1297 /* This makes no sense on a RAID 0 */
1298 return(EINVAL);
1299 }
1300 if (raidPtr->copyback_in_progress == 1) {
1301 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1302 raidPtr->Layout.numStripe;
1303 } else {
1304 *(int *) data = 100;
1305 }
1306 return (0);
1307
1308
1309 /* the sparetable daemon calls this to wait for the kernel to
1310 * need a spare table. this ioctl does not return until a
1311 * spare table is needed. XXX -- calling mpsleep here in the
1312 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1313 * -- I should either compute the spare table in the kernel,
1314 * or have a different -- XXX XXX -- interface (a different
1315 * character device) for delivering the table -- XXX */
1316 #if 0
1317 case RAIDFRAME_SPARET_WAIT:
1318 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1319 while (!rf_sparet_wait_queue)
1320 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1321 waitreq = rf_sparet_wait_queue;
1322 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1323 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1324
1325 /* structure assignment */
1326 *((RF_SparetWait_t *) data) = *waitreq;
1327
1328 RF_Free(waitreq, sizeof(*waitreq));
1329 return (0);
1330
1331 /* wakes up a process waiting on SPARET_WAIT and puts an error
1332 * code in it that will cause the dameon to exit */
1333 case RAIDFRAME_ABORT_SPARET_WAIT:
1334 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1335 waitreq->fcol = -1;
1336 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1337 waitreq->next = rf_sparet_wait_queue;
1338 rf_sparet_wait_queue = waitreq;
1339 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1340 wakeup(&rf_sparet_wait_queue);
1341 return (0);
1342
1343 /* used by the spare table daemon to deliver a spare table
1344 * into the kernel */
1345 case RAIDFRAME_SEND_SPARET:
1346
1347 /* install the spare table */
1348 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1349
1350 /* respond to the requestor. the return status of the spare
1351 * table installation is passed in the "fcol" field */
1352 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1353 waitreq->fcol = retcode;
1354 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1355 waitreq->next = rf_sparet_resp_queue;
1356 rf_sparet_resp_queue = waitreq;
1357 wakeup(&rf_sparet_resp_queue);
1358 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1359
1360 return (retcode);
1361 #endif
1362
1363 default:
1364 break; /* fall through to the os-specific code below */
1365
1366 }
1367
1368 if (!raidPtr->valid)
1369 return (EINVAL);
1370
1371 /*
1372 * Add support for "regular" device ioctls here.
1373 */
1374
1375 switch (cmd) {
1376 case DIOCGDINFO:
1377 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1378 break;
1379
1380 case DIOCGPART:
1381 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1382 ((struct partinfo *) data)->part =
1383 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1384 break;
1385
1386 case DIOCWDINFO:
1387 case DIOCSDINFO:
1388 if ((error = raidlock(rs)) != 0)
1389 return (error);
1390
1391 rs->sc_flags |= RAIDF_LABELLING;
1392
1393 error = setdisklabel(rs->sc_dkdev.dk_label,
1394 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1395 if (error == 0) {
1396 if (cmd == DIOCWDINFO)
1397 error = writedisklabel(RAIDLABELDEV(dev),
1398 raidstrategy, rs->sc_dkdev.dk_label,
1399 rs->sc_dkdev.dk_cpulabel);
1400 }
1401 rs->sc_flags &= ~RAIDF_LABELLING;
1402
1403 raidunlock(rs);
1404
1405 if (error)
1406 return (error);
1407 break;
1408
1409 case DIOCWLABEL:
1410 if (*(int *) data != 0)
1411 rs->sc_flags |= RAIDF_WLABEL;
1412 else
1413 rs->sc_flags &= ~RAIDF_WLABEL;
1414 break;
1415
1416 case DIOCGDEFLABEL:
1417 raidgetdefaultlabel(raidPtr, rs,
1418 (struct disklabel *) data);
1419 break;
1420
1421 default:
1422 retcode = ENOTTY;
1423 }
1424 return (retcode);
1425
1426 }
1427
1428
1429 /* raidinit -- complete the rest of the initialization for the
1430 RAIDframe device. */
1431
1432
1433 static int
1434 raidinit(dev, raidPtr, unit)
1435 dev_t dev;
1436 RF_Raid_t *raidPtr;
1437 int unit;
1438 {
1439 int retcode;
1440 struct raid_softc *rs;
1441
1442 retcode = 0;
1443
1444 rs = &raid_softc[unit];
1445 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1446 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1447
1448
1449 /* XXX should check return code first... */
1450 rs->sc_flags |= RAIDF_INITED;
1451
1452 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1453
1454 rs->sc_dkdev.dk_name = rs->sc_xname;
1455
1456 /* disk_attach actually creates space for the CPU disklabel, among
1457 * other things, so it's critical to call this *BEFORE* we try putzing
1458 * with disklabels. */
1459
1460 disk_attach(&rs->sc_dkdev);
1461
1462 /* XXX There may be a weird interaction here between this, and
1463 * protectedSectors, as used in RAIDframe. */
1464
1465 rs->sc_size = raidPtr->totalSectors;
1466 rs->sc_dev = dev;
1467
1468 return (retcode);
1469 }
1470
1471 /* wake up the daemon & tell it to get us a spare table
1472 * XXX
1473 * the entries in the queues should be tagged with the raidPtr
1474 * so that in the extremely rare case that two recons happen at once,
1475 * we know for which device were requesting a spare table
1476 * XXX
1477 *
1478 * XXX This code is not currently used. GO
1479 */
1480 int
1481 rf_GetSpareTableFromDaemon(req)
1482 RF_SparetWait_t *req;
1483 {
1484 int retcode;
1485
1486 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1487 req->next = rf_sparet_wait_queue;
1488 rf_sparet_wait_queue = req;
1489 wakeup(&rf_sparet_wait_queue);
1490
1491 /* mpsleep unlocks the mutex */
1492 while (!rf_sparet_resp_queue) {
1493 tsleep(&rf_sparet_resp_queue, PRIBIO,
1494 "raidframe getsparetable", 0);
1495 }
1496 req = rf_sparet_resp_queue;
1497 rf_sparet_resp_queue = req->next;
1498 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1499
1500 retcode = req->fcol;
1501 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1502 * alloc'd */
1503 return (retcode);
1504 }
1505
1506 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1507 * bp & passes it down.
1508 * any calls originating in the kernel must use non-blocking I/O
1509 * do some extra sanity checking to return "appropriate" error values for
1510 * certain conditions (to make some standard utilities work)
1511 *
1512 * Formerly known as: rf_DoAccessKernel
1513 */
1514 void
1515 raidstart(raidPtr)
1516 RF_Raid_t *raidPtr;
1517 {
1518 RF_SectorCount_t num_blocks, pb, sum;
1519 RF_RaidAddr_t raid_addr;
1520 int retcode;
1521 struct partition *pp;
1522 daddr_t blocknum;
1523 int unit;
1524 struct raid_softc *rs;
1525 int do_async;
1526 struct buf *bp;
1527
1528 unit = raidPtr->raidid;
1529 rs = &raid_softc[unit];
1530
1531 /* Check to see if we're at the limit... */
1532 RF_LOCK_MUTEX(raidPtr->mutex);
1533 while (raidPtr->openings > 0) {
1534 RF_UNLOCK_MUTEX(raidPtr->mutex);
1535
1536 /* get the next item, if any, from the queue */
1537 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1538 /* nothing more to do */
1539 return;
1540 }
1541 BUFQ_REMOVE(&rs->buf_queue, bp);
1542
1543 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1544 * partition.. Need to make it absolute to the underlying
1545 * device.. */
1546
1547 blocknum = bp->b_blkno;
1548 if (DISKPART(bp->b_dev) != RAW_PART) {
1549 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1550 blocknum += pp->p_offset;
1551 }
1552
1553 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1554 (int) blocknum));
1555
1556 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1557 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1558
1559 /* *THIS* is where we adjust what block we're going to...
1560 * but DO NOT TOUCH bp->b_blkno!!! */
1561 raid_addr = blocknum;
1562
1563 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1564 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1565 sum = raid_addr + num_blocks + pb;
1566 if (1 || rf_debugKernelAccess) {
1567 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1568 (int) raid_addr, (int) sum, (int) num_blocks,
1569 (int) pb, (int) bp->b_resid));
1570 }
1571 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1572 || (sum < num_blocks) || (sum < pb)) {
1573 bp->b_error = ENOSPC;
1574 bp->b_flags |= B_ERROR;
1575 bp->b_resid = bp->b_bcount;
1576 biodone(bp);
1577 RF_LOCK_MUTEX(raidPtr->mutex);
1578 continue;
1579 }
1580 /*
1581 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1582 */
1583
1584 if (bp->b_bcount & raidPtr->sectorMask) {
1585 bp->b_error = EINVAL;
1586 bp->b_flags |= B_ERROR;
1587 bp->b_resid = bp->b_bcount;
1588 biodone(bp);
1589 RF_LOCK_MUTEX(raidPtr->mutex);
1590 continue;
1591
1592 }
1593 db1_printf(("Calling DoAccess..\n"));
1594
1595
1596 RF_LOCK_MUTEX(raidPtr->mutex);
1597 raidPtr->openings--;
1598 RF_UNLOCK_MUTEX(raidPtr->mutex);
1599
1600 /*
1601 * Everything is async.
1602 */
1603 do_async = 1;
1604
1605 /* don't ever condition on bp->b_flags & B_WRITE.
1606 * always condition on B_READ instead */
1607
1608 /* XXX we're still at splbio() here... do we *really*
1609 need to be? */
1610
1611
1612 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1613 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1614 do_async, raid_addr, num_blocks,
1615 bp->b_un.b_addr, bp, NULL, NULL,
1616 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1617
1618
1619 RF_LOCK_MUTEX(raidPtr->mutex);
1620 }
1621 RF_UNLOCK_MUTEX(raidPtr->mutex);
1622 }
1623
1624
1625
1626
1627 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1628
1629 int
1630 rf_DispatchKernelIO(queue, req)
1631 RF_DiskQueue_t *queue;
1632 RF_DiskQueueData_t *req;
1633 {
1634 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1635 struct buf *bp;
1636 struct raidbuf *raidbp = NULL;
1637 struct raid_softc *rs;
1638 int unit;
1639 int s;
1640
1641 s=0;
1642 /* s = splbio();*/ /* want to test this */
1643 /* XXX along with the vnode, we also need the softc associated with
1644 * this device.. */
1645
1646 req->queue = queue;
1647
1648 unit = queue->raidPtr->raidid;
1649
1650 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1651
1652 if (unit >= numraid) {
1653 printf("Invalid unit number: %d %d\n", unit, numraid);
1654 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1655 }
1656 rs = &raid_softc[unit];
1657
1658 /* XXX is this the right place? */
1659 disk_busy(&rs->sc_dkdev);
1660
1661 bp = req->bp;
1662 #if 1
1663 /* XXX when there is a physical disk failure, someone is passing us a
1664 * buffer that contains old stuff!! Attempt to deal with this problem
1665 * without taking a performance hit... (not sure where the real bug
1666 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1667
1668 if (bp->b_flags & B_ERROR) {
1669 bp->b_flags &= ~B_ERROR;
1670 }
1671 if (bp->b_error != 0) {
1672 bp->b_error = 0;
1673 }
1674 #endif
1675 raidbp = RAIDGETBUF(rs);
1676
1677 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1678
1679 /*
1680 * context for raidiodone
1681 */
1682 raidbp->rf_obp = bp;
1683 raidbp->req = req;
1684
1685 LIST_INIT(&raidbp->rf_buf.b_dep);
1686
1687 switch (req->type) {
1688 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1689 /* XXX need to do something extra here.. */
1690 /* I'm leaving this in, as I've never actually seen it used,
1691 * and I'd like folks to report it... GO */
1692 printf(("WAKEUP CALLED\n"));
1693 queue->numOutstanding++;
1694
1695 /* XXX need to glue the original buffer into this?? */
1696
1697 KernelWakeupFunc(&raidbp->rf_buf);
1698 break;
1699
1700 case RF_IO_TYPE_READ:
1701 case RF_IO_TYPE_WRITE:
1702
1703 if (req->tracerec) {
1704 RF_ETIMER_START(req->tracerec->timer);
1705 }
1706 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1707 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1708 req->sectorOffset, req->numSector,
1709 req->buf, KernelWakeupFunc, (void *) req,
1710 queue->raidPtr->logBytesPerSector, req->b_proc);
1711
1712 if (rf_debugKernelAccess) {
1713 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1714 (long) bp->b_blkno));
1715 }
1716 queue->numOutstanding++;
1717 queue->last_deq_sector = req->sectorOffset;
1718 /* acc wouldn't have been let in if there were any pending
1719 * reqs at any other priority */
1720 queue->curPriority = req->priority;
1721
1722 db1_printf(("Going for %c to unit %d row %d col %d\n",
1723 req->type, unit, queue->row, queue->col));
1724 db1_printf(("sector %d count %d (%d bytes) %d\n",
1725 (int) req->sectorOffset, (int) req->numSector,
1726 (int) (req->numSector <<
1727 queue->raidPtr->logBytesPerSector),
1728 (int) queue->raidPtr->logBytesPerSector));
1729 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1730 raidbp->rf_buf.b_vp->v_numoutput++;
1731 }
1732 VOP_STRATEGY(&raidbp->rf_buf);
1733
1734 break;
1735
1736 default:
1737 panic("bad req->type in rf_DispatchKernelIO");
1738 }
1739 db1_printf(("Exiting from DispatchKernelIO\n"));
1740 /* splx(s); */ /* want to test this */
1741 return (0);
1742 }
1743 /* this is the callback function associated with a I/O invoked from
1744 kernel code.
1745 */
1746 static void
1747 KernelWakeupFunc(vbp)
1748 struct buf *vbp;
1749 {
1750 RF_DiskQueueData_t *req = NULL;
1751 RF_DiskQueue_t *queue;
1752 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1753 struct buf *bp;
1754 struct raid_softc *rs;
1755 int unit;
1756 register int s;
1757
1758 s = splbio();
1759 db1_printf(("recovering the request queue:\n"));
1760 req = raidbp->req;
1761
1762 bp = raidbp->rf_obp;
1763
1764 queue = (RF_DiskQueue_t *) req->queue;
1765
1766 if (raidbp->rf_buf.b_flags & B_ERROR) {
1767 bp->b_flags |= B_ERROR;
1768 bp->b_error = raidbp->rf_buf.b_error ?
1769 raidbp->rf_buf.b_error : EIO;
1770 }
1771
1772 /* XXX methinks this could be wrong... */
1773 #if 1
1774 bp->b_resid = raidbp->rf_buf.b_resid;
1775 #endif
1776
1777 if (req->tracerec) {
1778 RF_ETIMER_STOP(req->tracerec->timer);
1779 RF_ETIMER_EVAL(req->tracerec->timer);
1780 RF_LOCK_MUTEX(rf_tracing_mutex);
1781 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1782 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1783 req->tracerec->num_phys_ios++;
1784 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1785 }
1786 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1787
1788 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1789
1790
1791 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1792 * ballistic, and mark the component as hosed... */
1793
1794 if (bp->b_flags & B_ERROR) {
1795 /* Mark the disk as dead */
1796 /* but only mark it once... */
1797 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1798 rf_ds_optimal) {
1799 printf("raid%d: IO Error. Marking %s as failed.\n",
1800 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1801 queue->raidPtr->Disks[queue->row][queue->col].status =
1802 rf_ds_failed;
1803 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1804 queue->raidPtr->numFailures++;
1805 /* XXX here we should bump the version number for each component, and write that data out */
1806 } else { /* Disk is already dead... */
1807 /* printf("Disk already marked as dead!\n"); */
1808 }
1809
1810 }
1811
1812 rs = &raid_softc[unit];
1813 RAIDPUTBUF(rs, raidbp);
1814
1815
1816 if (bp->b_resid == 0) {
1817 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1818 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1819 }
1820
1821 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1822 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1823
1824 splx(s);
1825 }
1826
1827
1828
1829 /*
1830 * initialize a buf structure for doing an I/O in the kernel.
1831 */
1832 static void
1833 InitBP(
1834 struct buf * bp,
1835 struct vnode * b_vp,
1836 unsigned rw_flag,
1837 dev_t dev,
1838 RF_SectorNum_t startSect,
1839 RF_SectorCount_t numSect,
1840 caddr_t buf,
1841 void (*cbFunc) (struct buf *),
1842 void *cbArg,
1843 int logBytesPerSector,
1844 struct proc * b_proc)
1845 {
1846 /* bp->b_flags = B_PHYS | rw_flag; */
1847 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1848 bp->b_bcount = numSect << logBytesPerSector;
1849 bp->b_bufsize = bp->b_bcount;
1850 bp->b_error = 0;
1851 bp->b_dev = dev;
1852 bp->b_un.b_addr = buf;
1853 bp->b_blkno = startSect;
1854 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1855 if (bp->b_bcount == 0) {
1856 panic("bp->b_bcount is zero in InitBP!!\n");
1857 }
1858 bp->b_proc = b_proc;
1859 bp->b_iodone = cbFunc;
1860 bp->b_vp = b_vp;
1861
1862 }
1863
1864 static void
1865 raidgetdefaultlabel(raidPtr, rs, lp)
1866 RF_Raid_t *raidPtr;
1867 struct raid_softc *rs;
1868 struct disklabel *lp;
1869 {
1870 db1_printf(("Building a default label...\n"));
1871 bzero(lp, sizeof(*lp));
1872
1873 /* fabricate a label... */
1874 lp->d_secperunit = raidPtr->totalSectors;
1875 lp->d_secsize = raidPtr->bytesPerSector;
1876 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1877 lp->d_ntracks = 1;
1878 lp->d_ncylinders = raidPtr->totalSectors /
1879 (lp->d_nsectors * lp->d_ntracks);
1880 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1881
1882 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1883 lp->d_type = DTYPE_RAID;
1884 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1885 lp->d_rpm = 3600;
1886 lp->d_interleave = 1;
1887 lp->d_flags = 0;
1888
1889 lp->d_partitions[RAW_PART].p_offset = 0;
1890 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1891 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1892 lp->d_npartitions = RAW_PART + 1;
1893
1894 lp->d_magic = DISKMAGIC;
1895 lp->d_magic2 = DISKMAGIC;
1896 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1897
1898 }
1899 /*
1900 * Read the disklabel from the raid device. If one is not present, fake one
1901 * up.
1902 */
1903 static void
1904 raidgetdisklabel(dev)
1905 dev_t dev;
1906 {
1907 int unit = raidunit(dev);
1908 struct raid_softc *rs = &raid_softc[unit];
1909 char *errstring;
1910 struct disklabel *lp = rs->sc_dkdev.dk_label;
1911 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1912 RF_Raid_t *raidPtr;
1913
1914 db1_printf(("Getting the disklabel...\n"));
1915
1916 bzero(clp, sizeof(*clp));
1917
1918 raidPtr = raidPtrs[unit];
1919
1920 raidgetdefaultlabel(raidPtr, rs, lp);
1921
1922 /*
1923 * Call the generic disklabel extraction routine.
1924 */
1925 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1926 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1927 if (errstring)
1928 raidmakedisklabel(rs);
1929 else {
1930 int i;
1931 struct partition *pp;
1932
1933 /*
1934 * Sanity check whether the found disklabel is valid.
1935 *
1936 * This is necessary since total size of the raid device
1937 * may vary when an interleave is changed even though exactly
1938 * same componets are used, and old disklabel may used
1939 * if that is found.
1940 */
1941 if (lp->d_secperunit != rs->sc_size)
1942 printf("WARNING: %s: "
1943 "total sector size in disklabel (%d) != "
1944 "the size of raid (%ld)\n", rs->sc_xname,
1945 lp->d_secperunit, (long) rs->sc_size);
1946 for (i = 0; i < lp->d_npartitions; i++) {
1947 pp = &lp->d_partitions[i];
1948 if (pp->p_offset + pp->p_size > rs->sc_size)
1949 printf("WARNING: %s: end of partition `%c' "
1950 "exceeds the size of raid (%ld)\n",
1951 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1952 }
1953 }
1954
1955 }
1956 /*
1957 * Take care of things one might want to take care of in the event
1958 * that a disklabel isn't present.
1959 */
1960 static void
1961 raidmakedisklabel(rs)
1962 struct raid_softc *rs;
1963 {
1964 struct disklabel *lp = rs->sc_dkdev.dk_label;
1965 db1_printf(("Making a label..\n"));
1966
1967 /*
1968 * For historical reasons, if there's no disklabel present
1969 * the raw partition must be marked FS_BSDFFS.
1970 */
1971
1972 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1973
1974 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1975
1976 lp->d_checksum = dkcksum(lp);
1977 }
1978 /*
1979 * Lookup the provided name in the filesystem. If the file exists,
1980 * is a valid block device, and isn't being used by anyone else,
1981 * set *vpp to the file's vnode.
1982 * You'll find the original of this in ccd.c
1983 */
1984 int
1985 raidlookup(path, p, vpp)
1986 char *path;
1987 struct proc *p;
1988 struct vnode **vpp; /* result */
1989 {
1990 struct nameidata nd;
1991 struct vnode *vp;
1992 struct vattr va;
1993 int error;
1994
1995 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1996 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1997 #ifdef DEBUG
1998 printf("RAIDframe: vn_open returned %d\n", error);
1999 #endif
2000 return (error);
2001 }
2002 vp = nd.ni_vp;
2003 if (vp->v_usecount > 1) {
2004 VOP_UNLOCK(vp, 0);
2005 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2006 return (EBUSY);
2007 }
2008 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2009 VOP_UNLOCK(vp, 0);
2010 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2011 return (error);
2012 }
2013 /* XXX: eventually we should handle VREG, too. */
2014 if (va.va_type != VBLK) {
2015 VOP_UNLOCK(vp, 0);
2016 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2017 return (ENOTBLK);
2018 }
2019 VOP_UNLOCK(vp, 0);
2020 *vpp = vp;
2021 return (0);
2022 }
2023 /*
2024 * Wait interruptibly for an exclusive lock.
2025 *
2026 * XXX
2027 * Several drivers do this; it should be abstracted and made MP-safe.
2028 * (Hmm... where have we seen this warning before :-> GO )
2029 */
2030 static int
2031 raidlock(rs)
2032 struct raid_softc *rs;
2033 {
2034 int error;
2035
2036 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2037 rs->sc_flags |= RAIDF_WANTED;
2038 if ((error =
2039 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2040 return (error);
2041 }
2042 rs->sc_flags |= RAIDF_LOCKED;
2043 return (0);
2044 }
2045 /*
2046 * Unlock and wake up any waiters.
2047 */
2048 static void
2049 raidunlock(rs)
2050 struct raid_softc *rs;
2051 {
2052
2053 rs->sc_flags &= ~RAIDF_LOCKED;
2054 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2055 rs->sc_flags &= ~RAIDF_WANTED;
2056 wakeup(rs);
2057 }
2058 }
2059
2060
2061 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2062 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2063
2064 int
2065 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2066 {
2067 RF_ComponentLabel_t clabel;
2068 raidread_component_label(dev, b_vp, &clabel);
2069 clabel.mod_counter = mod_counter;
2070 clabel.clean = RF_RAID_CLEAN;
2071 raidwrite_component_label(dev, b_vp, &clabel);
2072 return(0);
2073 }
2074
2075
2076 int
2077 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2078 {
2079 RF_ComponentLabel_t clabel;
2080 raidread_component_label(dev, b_vp, &clabel);
2081 clabel.mod_counter = mod_counter;
2082 clabel.clean = RF_RAID_DIRTY;
2083 raidwrite_component_label(dev, b_vp, &clabel);
2084 return(0);
2085 }
2086
2087 /* ARGSUSED */
2088 int
2089 raidread_component_label(dev, b_vp, clabel)
2090 dev_t dev;
2091 struct vnode *b_vp;
2092 RF_ComponentLabel_t *clabel;
2093 {
2094 struct buf *bp;
2095 int error;
2096
2097 /* XXX should probably ensure that we don't try to do this if
2098 someone has changed rf_protected_sectors. */
2099
2100 /* get a block of the appropriate size... */
2101 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2102 bp->b_dev = dev;
2103
2104 /* get our ducks in a row for the read */
2105 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2106 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2107 bp->b_flags = B_BUSY | B_READ;
2108 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2109
2110 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2111
2112 error = biowait(bp);
2113
2114 if (!error) {
2115 memcpy(clabel, bp->b_un.b_addr,
2116 sizeof(RF_ComponentLabel_t));
2117 #if 0
2118 print_component_label( clabel );
2119 #endif
2120 } else {
2121 #if 0
2122 printf("Failed to read RAID component label!\n");
2123 #endif
2124 }
2125
2126 bp->b_flags = B_INVAL | B_AGE;
2127 brelse(bp);
2128 return(error);
2129 }
2130 /* ARGSUSED */
2131 int
2132 raidwrite_component_label(dev, b_vp, clabel)
2133 dev_t dev;
2134 struct vnode *b_vp;
2135 RF_ComponentLabel_t *clabel;
2136 {
2137 struct buf *bp;
2138 int error;
2139
2140 /* get a block of the appropriate size... */
2141 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2142 bp->b_dev = dev;
2143
2144 /* get our ducks in a row for the write */
2145 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2146 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2147 bp->b_flags = B_BUSY | B_WRITE;
2148 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2149
2150 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2151
2152 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2153
2154 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2155 error = biowait(bp);
2156 bp->b_flags = B_INVAL | B_AGE;
2157 brelse(bp);
2158 if (error) {
2159 #if 1
2160 printf("Failed to write RAID component info!\n");
2161 #endif
2162 }
2163
2164 return(error);
2165 }
2166
2167 void
2168 rf_markalldirty( raidPtr )
2169 RF_Raid_t *raidPtr;
2170 {
2171 RF_ComponentLabel_t clabel;
2172 int r,c;
2173
2174 raidPtr->mod_counter++;
2175 for (r = 0; r < raidPtr->numRow; r++) {
2176 for (c = 0; c < raidPtr->numCol; c++) {
2177 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2178 raidread_component_label(
2179 raidPtr->Disks[r][c].dev,
2180 raidPtr->raid_cinfo[r][c].ci_vp,
2181 &clabel);
2182 if (clabel.status == rf_ds_spared) {
2183 /* XXX do something special...
2184 but whatever you do, don't
2185 try to access it!! */
2186 } else {
2187 #if 0
2188 clabel.status =
2189 raidPtr->Disks[r][c].status;
2190 raidwrite_component_label(
2191 raidPtr->Disks[r][c].dev,
2192 raidPtr->raid_cinfo[r][c].ci_vp,
2193 &clabel);
2194 #endif
2195 raidmarkdirty(
2196 raidPtr->Disks[r][c].dev,
2197 raidPtr->raid_cinfo[r][c].ci_vp,
2198 raidPtr->mod_counter);
2199 }
2200 }
2201 }
2202 }
2203 /* printf("Component labels marked dirty.\n"); */
2204 #if 0
2205 for( c = 0; c < raidPtr->numSpare ; c++) {
2206 sparecol = raidPtr->numCol + c;
2207 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2208 /*
2209
2210 XXX this is where we get fancy and map this spare
2211 into it's correct spot in the array.
2212
2213 */
2214 /*
2215
2216 we claim this disk is "optimal" if it's
2217 rf_ds_used_spare, as that means it should be
2218 directly substitutable for the disk it replaced.
2219 We note that too...
2220
2221 */
2222
2223 for(i=0;i<raidPtr->numRow;i++) {
2224 for(j=0;j<raidPtr->numCol;j++) {
2225 if ((raidPtr->Disks[i][j].spareRow ==
2226 r) &&
2227 (raidPtr->Disks[i][j].spareCol ==
2228 sparecol)) {
2229 srow = r;
2230 scol = sparecol;
2231 break;
2232 }
2233 }
2234 }
2235
2236 raidread_component_label(
2237 raidPtr->Disks[r][sparecol].dev,
2238 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2239 &clabel);
2240 /* make sure status is noted */
2241 clabel.version = RF_COMPONENT_LABEL_VERSION;
2242 clabel.mod_counter = raidPtr->mod_counter;
2243 clabel.serial_number = raidPtr->serial_number;
2244 clabel.row = srow;
2245 clabel.column = scol;
2246 clabel.num_rows = raidPtr->numRow;
2247 clabel.num_columns = raidPtr->numCol;
2248 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2249 clabel.status = rf_ds_optimal;
2250 raidwrite_component_label(
2251 raidPtr->Disks[r][sparecol].dev,
2252 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2253 &clabel);
2254 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2255 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2256 }
2257 }
2258
2259 #endif
2260 }
2261
2262
2263 void
2264 rf_update_component_labels( raidPtr )
2265 RF_Raid_t *raidPtr;
2266 {
2267 RF_ComponentLabel_t clabel;
2268 int sparecol;
2269 int r,c;
2270 int i,j;
2271 int srow, scol;
2272
2273 srow = -1;
2274 scol = -1;
2275
2276 /* XXX should do extra checks to make sure things really are clean,
2277 rather than blindly setting the clean bit... */
2278
2279 raidPtr->mod_counter++;
2280
2281 for (r = 0; r < raidPtr->numRow; r++) {
2282 for (c = 0; c < raidPtr->numCol; c++) {
2283 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2284 raidread_component_label(
2285 raidPtr->Disks[r][c].dev,
2286 raidPtr->raid_cinfo[r][c].ci_vp,
2287 &clabel);
2288 /* make sure status is noted */
2289 clabel.status = rf_ds_optimal;
2290 raidwrite_component_label(
2291 raidPtr->Disks[r][c].dev,
2292 raidPtr->raid_cinfo[r][c].ci_vp,
2293 &clabel);
2294 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2295 raidmarkclean(
2296 raidPtr->Disks[r][c].dev,
2297 raidPtr->raid_cinfo[r][c].ci_vp,
2298 raidPtr->mod_counter);
2299 }
2300 }
2301 /* else we don't touch it.. */
2302 #if 0
2303 else if (raidPtr->Disks[r][c].status !=
2304 rf_ds_failed) {
2305 raidread_component_label(
2306 raidPtr->Disks[r][c].dev,
2307 raidPtr->raid_cinfo[r][c].ci_vp,
2308 &clabel);
2309 /* make sure status is noted */
2310 clabel.status =
2311 raidPtr->Disks[r][c].status;
2312 raidwrite_component_label(
2313 raidPtr->Disks[r][c].dev,
2314 raidPtr->raid_cinfo[r][c].ci_vp,
2315 &clabel);
2316 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2317 raidmarkclean(
2318 raidPtr->Disks[r][c].dev,
2319 raidPtr->raid_cinfo[r][c].ci_vp,
2320 raidPtr->mod_counter);
2321 }
2322 }
2323 #endif
2324 }
2325 }
2326
2327 for( c = 0; c < raidPtr->numSpare ; c++) {
2328 sparecol = raidPtr->numCol + c;
2329 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2330 /*
2331
2332 we claim this disk is "optimal" if it's
2333 rf_ds_used_spare, as that means it should be
2334 directly substitutable for the disk it replaced.
2335 We note that too...
2336
2337 */
2338
2339 for(i=0;i<raidPtr->numRow;i++) {
2340 for(j=0;j<raidPtr->numCol;j++) {
2341 if ((raidPtr->Disks[i][j].spareRow ==
2342 0) &&
2343 (raidPtr->Disks[i][j].spareCol ==
2344 sparecol)) {
2345 srow = i;
2346 scol = j;
2347 break;
2348 }
2349 }
2350 }
2351
2352 raidread_component_label(
2353 raidPtr->Disks[0][sparecol].dev,
2354 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2355 &clabel);
2356 /* make sure status is noted */
2357 clabel.version = RF_COMPONENT_LABEL_VERSION;
2358 clabel.mod_counter = raidPtr->mod_counter;
2359 clabel.serial_number = raidPtr->serial_number;
2360 clabel.row = srow;
2361 clabel.column = scol;
2362 clabel.num_rows = raidPtr->numRow;
2363 clabel.num_columns = raidPtr->numCol;
2364 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2365 clabel.status = rf_ds_optimal;
2366 raidwrite_component_label(
2367 raidPtr->Disks[0][sparecol].dev,
2368 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2369 &clabel);
2370 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2371 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2372 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2373 raidPtr->mod_counter);
2374 }
2375 }
2376 }
2377 /* printf("Component labels updated\n"); */
2378 }
2379
2380 void
2381 rf_ReconThread(req)
2382 struct rf_recon_req *req;
2383 {
2384 int s;
2385 RF_Raid_t *raidPtr;
2386
2387 s = splbio();
2388 raidPtr = (RF_Raid_t *) req->raidPtr;
2389 raidPtr->recon_in_progress = 1;
2390
2391 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2392 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2393
2394 /* XXX get rid of this! we don't need it at all.. */
2395 RF_Free(req, sizeof(*req));
2396
2397 raidPtr->recon_in_progress = 0;
2398 splx(s);
2399
2400 /* That's all... */
2401 kthread_exit(0); /* does not return */
2402 }
2403
2404 void
2405 rf_RewriteParityThread(raidPtr)
2406 RF_Raid_t *raidPtr;
2407 {
2408 int retcode;
2409 int s;
2410
2411 raidPtr->parity_rewrite_in_progress = 1;
2412 s = splbio();
2413 retcode = rf_RewriteParity(raidPtr);
2414 splx(s);
2415 if (retcode) {
2416 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2417 } else {
2418 /* set the clean bit! If we shutdown correctly,
2419 the clean bit on each component label will get
2420 set */
2421 raidPtr->parity_good = RF_RAID_CLEAN;
2422 }
2423 raidPtr->parity_rewrite_in_progress = 0;
2424
2425 /* That's all... */
2426 kthread_exit(0); /* does not return */
2427 }
2428
2429
2430 void
2431 rf_CopybackThread(raidPtr)
2432 RF_Raid_t *raidPtr;
2433 {
2434 int s;
2435
2436 raidPtr->copyback_in_progress = 1;
2437 s = splbio();
2438 rf_CopybackReconstructedData(raidPtr);
2439 splx(s);
2440 raidPtr->copyback_in_progress = 0;
2441
2442 /* That's all... */
2443 kthread_exit(0); /* does not return */
2444 }
2445
2446
2447 void
2448 rf_ReconstructInPlaceThread(req)
2449 struct rf_recon_req *req;
2450 {
2451 int retcode;
2452 int s;
2453 RF_Raid_t *raidPtr;
2454
2455 s = splbio();
2456 raidPtr = req->raidPtr;
2457 raidPtr->recon_in_progress = 1;
2458 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2459 RF_Free(req, sizeof(*req));
2460 raidPtr->recon_in_progress = 0;
2461 splx(s);
2462
2463 /* That's all... */
2464 kthread_exit(0); /* does not return */
2465 }
2466
2467 void
2468 rf_mountroot_hook(dev)
2469 struct device *dev;
2470 {
2471 #if 1
2472 printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
2473 #endif
2474 if (boothowto & RB_ASKNAME) {
2475 /* We don't auto-config... */
2476 } else {
2477 /* They didn't ask, and we found something bootable... */
2478 /* XXX pretend for now.. */
2479 if (raidautoconfig) {
2480 rootspec = raid_rooty;
2481 }
2482 }
2483 }
2484
2485
2486 RF_AutoConfig_t *
2487 rf_find_raid_components()
2488 {
2489 struct devnametobdevmaj *dtobdm;
2490 struct vnode *vp;
2491 struct disklabel label;
2492 struct device *dv;
2493 char *cd_name;
2494 dev_t dev;
2495 int error;
2496 int i;
2497 int good_one;
2498 RF_ComponentLabel_t *clabel;
2499 RF_AutoConfig_t *ac_list;
2500 RF_AutoConfig_t *ac;
2501
2502
2503 /* initialize the AutoConfig list */
2504 ac_list = NULL;
2505
2506 if (raidautoconfig) {
2507
2508 /* we begin by trolling through *all* the devices on the system */
2509
2510 for (dv = alldevs.tqh_first; dv != NULL;
2511 dv = dv->dv_list.tqe_next) {
2512
2513 /* we are only interested in disks... */
2514 if (dv->dv_class != DV_DISK)
2515 continue;
2516
2517 /* we don't care about floppies... */
2518 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2519 continue;
2520 }
2521
2522 /* need to find the device_name_to_block_device_major stuff */
2523 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2524 dtobdm = dev_name2blk;
2525 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2526 dtobdm++;
2527 }
2528
2529 /* get a vnode for the raw partition of this disk */
2530
2531 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2532 if (bdevvp(dev, &vp))
2533 panic("RAID can't alloc vnode");
2534
2535 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2536
2537 if (error) {
2538 /* "Who cares." Continue looking
2539 for something that exists*/
2540 vput(vp);
2541 continue;
2542 }
2543
2544 /* Ok, the disk exists. Go get the disklabel. */
2545 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2546 FREAD, NOCRED, 0);
2547 if (error) {
2548 /*
2549 * XXX can't happen - open() would
2550 * have errored out (or faked up one)
2551 */
2552 printf("can't get label for dev %s%c (%d)!?!?\n",
2553 dv->dv_xname, 'a' + RAW_PART, error);
2554 }
2555
2556 /* don't need this any more. We'll allocate it again
2557 a little later if we really do... */
2558 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2559 vput(vp);
2560
2561 for (i=0; i < label.d_npartitions; i++) {
2562 /* We only support partitions marked as RAID */
2563 if (label.d_partitions[i].p_fstype != FS_RAID)
2564 continue;
2565
2566 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2567 if (bdevvp(dev, &vp))
2568 panic("RAID can't alloc vnode");
2569
2570 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2571 if (error) {
2572 /* Whatever... */
2573 vput(vp);
2574 continue;
2575 }
2576
2577 good_one = 0;
2578
2579 clabel = (RF_ComponentLabel_t *)
2580 malloc(sizeof(RF_ComponentLabel_t),
2581 M_RAIDFRAME, M_NOWAIT);
2582 if (clabel == NULL) {
2583 /* XXX CLEANUP HERE */
2584 printf("RAID auto config: out of memory!\n");
2585 return(NULL); /* XXX probably should panic? */
2586 }
2587
2588 if (!raidread_component_label(dev, vp, clabel)) {
2589 /* Got the label. Does it look reasonable? */
2590 if (rf_reasonable_label(clabel) &&
2591 (clabel->partitionSize ==
2592 label.d_partitions[i].p_size)) {
2593 #if DEBUG
2594 printf("Component on: %s%c: %d\n",
2595 dv->dv_xname, 'a'+i,
2596 label.d_partitions[i].p_size);
2597 print_component_label(clabel);
2598 #endif
2599 /* if it's reasonable, add it,
2600 else ignore it. */
2601 ac = (RF_AutoConfig_t *)
2602 malloc(sizeof(RF_AutoConfig_t),
2603 M_RAIDFRAME,
2604 M_NOWAIT);
2605 if (ac == NULL) {
2606 /* XXX should panic?? */
2607 return(NULL);
2608 }
2609
2610 sprintf(ac->devname, "%s%c",
2611 dv->dv_xname, 'a'+i);
2612 ac->dev = dev;
2613 ac->vp = vp;
2614 ac->clabel = clabel;
2615 ac->next = ac_list;
2616 ac_list = ac;
2617 good_one = 1;
2618 }
2619 }
2620 if (!good_one) {
2621 /* cleanup */
2622 free(clabel, M_RAIDFRAME);
2623 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2624 vput(vp);
2625 }
2626 }
2627 }
2628 }
2629 return(ac_list);
2630 }
2631
2632 static int
2633 rf_reasonable_label(clabel)
2634 RF_ComponentLabel_t *clabel;
2635 {
2636
2637 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2638 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2639 ((clabel->clean == RF_RAID_CLEAN) ||
2640 (clabel->clean == RF_RAID_DIRTY)) &&
2641 clabel->row >=0 &&
2642 clabel->column >= 0 &&
2643 clabel->num_rows > 0 &&
2644 clabel->num_columns > 0 &&
2645 clabel->row < clabel->num_rows &&
2646 clabel->column < clabel->num_columns &&
2647 clabel->blockSize > 0 &&
2648 clabel->numBlocks > 0) {
2649 /* label looks reasonable enough... */
2650 return(1);
2651 }
2652 return(0);
2653 }
2654
2655
2656 void
2657 print_component_label(clabel)
2658 RF_ComponentLabel_t *clabel;
2659 {
2660 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2661 clabel->row, clabel->column,
2662 clabel->num_rows, clabel->num_columns);
2663 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2664 clabel->version, clabel->serial_number,
2665 clabel->mod_counter);
2666 printf(" Clean: %s Status: %d\n",
2667 clabel->clean ? "Yes" : "No", clabel->status );
2668 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2669 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2670 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2671 (char) clabel->parityConfig, clabel->blockSize,
2672 clabel->numBlocks);
2673 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2674 printf(" Last configured as: raid%d\n", clabel->last_unit );
2675 printf(" Config order: %d\n", clabel->config_order);
2676
2677 }
2678
2679 RF_ConfigSet_t *
2680 rf_create_auto_sets(ac_list)
2681 RF_AutoConfig_t *ac_list;
2682 {
2683 RF_AutoConfig_t *ac;
2684 RF_ConfigSet_t *config_sets;
2685 RF_ConfigSet_t *cset;
2686 RF_AutoConfig_t *ac_next;
2687
2688
2689 config_sets = NULL;
2690
2691 /* Go through the AutoConfig list, and figure out which components
2692 belong to what sets. */
2693 ac = ac_list;
2694 while(ac!=NULL) {
2695 /* we're going to putz with ac->next, so save it here
2696 for use at the end of the loop */
2697 ac_next = ac->next;
2698
2699 if (config_sets == NULL) {
2700 /* will need at least this one... */
2701 config_sets = (RF_ConfigSet_t *)
2702 malloc(sizeof(RF_ConfigSet_t),
2703 M_RAIDFRAME, M_NOWAIT);
2704 if (config_sets == NULL) {
2705 panic("rf_create_auto_sets: No memory!\n");
2706 }
2707 /* this one is easy :) */
2708 config_sets->ac = ac;
2709 config_sets->next = NULL;
2710 ac->next = NULL;
2711 } else {
2712 /* which set does this component fit into? */
2713 cset = config_sets;
2714 while(cset!=NULL) {
2715 if (rf_does_it_fit(cset, ac)) {
2716 /* looks like it matches */
2717 ac->next = cset->ac;
2718 cset->ac = ac;
2719 break;
2720 }
2721 cset = cset->next;
2722 }
2723 if (cset==NULL) {
2724 /* didn't find a match above... new set..*/
2725 cset = (RF_ConfigSet_t *)
2726 malloc(sizeof(RF_ConfigSet_t),
2727 M_RAIDFRAME, M_NOWAIT);
2728 if (cset == NULL) {
2729 panic("rf_create_auto_sets: No memory!\n");
2730 }
2731 cset->ac = ac;
2732 ac->next = NULL;
2733 cset->next = config_sets;
2734 config_sets = cset;
2735 }
2736 }
2737 ac = ac_next;
2738 }
2739
2740
2741 return(config_sets);
2742 }
2743
2744 static int
2745 rf_does_it_fit(cset, ac)
2746 RF_ConfigSet_t *cset;
2747 RF_AutoConfig_t *ac;
2748 {
2749 RF_ComponentLabel_t *clabel1, *clabel2;
2750
2751 /* If this one matches the *first* one in the set, that's good
2752 enough, since the other members of the set would have been
2753 through here too... */
2754
2755 clabel1 = cset->ac->clabel;
2756 clabel2 = ac->clabel;
2757 if ((clabel1->version == clabel2->version) &&
2758 (clabel1->serial_number == clabel2->serial_number) &&
2759 (clabel1->mod_counter == clabel2->mod_counter) &&
2760 (clabel1->num_rows == clabel2->num_rows) &&
2761 (clabel1->num_columns == clabel2->num_columns) &&
2762 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2763 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2764 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2765 (clabel1->parityConfig == clabel2->parityConfig) &&
2766 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2767 (clabel1->blockSize == clabel2->blockSize) &&
2768 (clabel1->numBlocks == clabel2->numBlocks) &&
2769 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2770 (clabel1->root_partition == clabel2->root_partition) &&
2771 (clabel1->last_unit == clabel2->last_unit) &&
2772 (clabel1->config_order == clabel2->config_order)) {
2773 /* if it get's here, it almost *has* to be a match */
2774 } else {
2775 /* it's not consistent with somebody in the set..
2776 punt */
2777 return(0);
2778 }
2779 /* all was fine.. it must fit... */
2780 return(1);
2781 }
2782
2783 #if 0
2784 int have_enough();
2785 int
2786 have_enough()
2787 {
2788 /* check to see that we have enough 'live' components
2789 of this set. If so, we can configure it if necessary */
2790
2791 }
2792 #endif
2793
2794 void
2795 rf_create_configuration(ac,config,raidPtr)
2796 RF_AutoConfig_t *ac;
2797 RF_Config_t *config;
2798 RF_Raid_t *raidPtr;
2799 {
2800 RF_ComponentLabel_t *clabel;
2801
2802 clabel = ac->clabel;
2803
2804 /* 1. Fill in the common stuff */
2805 config->numRow = clabel->num_rows;
2806 config->numCol = clabel->num_columns;
2807 config->numSpare = 0; /* XXX should this be set here? */
2808 config->sectPerSU = clabel->sectPerSU;
2809 config->SUsPerPU = clabel->SUsPerPU;
2810 config->SUsPerRU = clabel->SUsPerRU;
2811 config->parityConfig = clabel->parityConfig;
2812 /* XXX... */
2813 strcpy(config->diskQueueType,"fifo");
2814 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2815 config->layoutSpecificSize = 0; /* XXX ?? */
2816
2817 while(ac!=NULL) {
2818 /* row/col values will be in range due to the checks
2819 in reasonable_label() */
2820 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2821 ac->devname);
2822 ac = ac->next;
2823 }
2824
2825 }
2826
2827 int
2828 rf_set_autoconfig(raidPtr, new_value)
2829 RF_Raid_t *raidPtr;
2830 int new_value;
2831 {
2832 RF_ComponentLabel_t clabel;
2833 struct vnode *vp;
2834 dev_t dev;
2835 int row, column;
2836
2837 for(row=0; row<raidPtr->numRow; row++) {
2838 for(column=0; column<raidPtr->numCol; column++) {
2839 dev = raidPtr->Disks[row][column].dev;
2840 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2841 raidread_component_label(dev, vp, &clabel);
2842 clabel.autoconfigure = new_value;
2843 raidwrite_component_label(dev, vp, &clabel);
2844 }
2845 }
2846 return(new_value);
2847 }
2848
2849 int
2850 rf_set_rootpartition(raidPtr, new_value)
2851 RF_Raid_t *raidPtr;
2852 int new_value;
2853 {
2854 RF_ComponentLabel_t clabel;
2855 struct vnode *vp;
2856 dev_t dev;
2857 int row, column;
2858
2859 for(row=0; row<raidPtr->numRow; row++) {
2860 for(column=0; column<raidPtr->numCol; column++) {
2861 dev = raidPtr->Disks[row][column].dev;
2862 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2863 raidread_component_label(dev, vp, &clabel);
2864 clabel.root_partition = new_value;
2865 raidwrite_component_label(dev, vp, &clabel);
2866 }
2867 }
2868 return(new_value);
2869 }
2870
2871 void
2872 rf_release_all_vps(cset)
2873 RF_ConfigSet_t *cset;
2874 {
2875 RF_AutoConfig_t *ac;
2876
2877 ac = cset->ac;
2878 while(ac!=NULL) {
2879 /* Close the vp, and give it back */
2880 if (ac->vp) {
2881 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2882 vput(ac->vp);
2883 }
2884 ac = ac->next;
2885 }
2886 }
2887
2888
2889 void
2890 rf_cleanup_config_set(cset)
2891 RF_ConfigSet_t *cset;
2892 {
2893 RF_AutoConfig_t *ac;
2894 RF_AutoConfig_t *next_ac;
2895
2896 ac = cset->ac;
2897 while(ac!=NULL) {
2898 next_ac = ac->next;
2899 /* nuke the label */
2900 free(ac->clabel, M_RAIDFRAME);
2901 /* cleanup the config structure */
2902 free(ac, M_RAIDFRAME);
2903 /* "next.." */
2904 ac = next_ac;
2905 }
2906 /* and, finally, nuke the config set */
2907 free(cset, M_RAIDFRAME);
2908 }
2909
2910
2911 void
2912 raid_init_component_label(raidPtr, clabel)
2913 RF_Raid_t *raidPtr;
2914 RF_ComponentLabel_t *clabel;
2915 {
2916 /* current version number */
2917 clabel->version = RF_COMPONENT_LABEL_VERSION;
2918 clabel->serial_number = clabel->serial_number;
2919 clabel->mod_counter = raidPtr->mod_counter;
2920 clabel->num_rows = raidPtr->numRow;
2921 clabel->num_columns = raidPtr->numCol;
2922 clabel->clean = RF_RAID_DIRTY; /* not clean */
2923 clabel->status = rf_ds_optimal; /* "It's good!" */
2924
2925 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2926 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2927 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2928 /* XXX not portable */
2929 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2930 /* XXX THIS SHOULD BE SET RIGHT!! */
2931 clabel->maxOutstanding = 100;
2932 clabel->autoconfigure = 0;
2933 clabel->root_partition = 0;
2934 clabel->last_unit = raidPtr->raidid;
2935 }
2936