rf_netbsdkintf.c revision 1.30 1 /* $NetBSD: rf_netbsdkintf.c,v 1.30 1999/11/17 01:16:37 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
183 * reconstruction
184 * requests */
185
186
187 decl_simple_lock_data(, recon_queue_mutex)
188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
190
191 /* prototypes */
192 static void KernelWakeupFunc(struct buf * bp);
193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
194 dev_t dev, RF_SectorNum_t startSect,
195 RF_SectorCount_t numSect, caddr_t buf,
196 void (*cbFunc) (struct buf *), void *cbArg,
197 int logBytesPerSector, struct proc * b_proc);
198
199 #define Dprintf0(s) if (rf_queueDebug) \
200 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
201 #define Dprintf1(s,a) if (rf_queueDebug) \
202 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
203 #define Dprintf2(s,a,b) if (rf_queueDebug) \
204 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
206 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
207
208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
210
211 void raidattach __P((int));
212 int raidsize __P((dev_t));
213
214 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
215 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
216 static int raidinit __P((dev_t, RF_Raid_t *, int));
217
218 int raidopen __P((dev_t, int, int, struct proc *));
219 int raidclose __P((dev_t, int, int, struct proc *));
220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
221 int raidwrite __P((dev_t, struct uio *, int));
222 int raidread __P((dev_t, struct uio *, int));
223 void raidstrategy __P((struct buf *));
224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
225
226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
228 void rf_update_component_labels( RF_Raid_t *);
229 /*
230 * Pilfered from ccd.c
231 */
232
233 struct raidbuf {
234 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
235 struct buf *rf_obp; /* ptr. to original I/O buf */
236 int rf_flags; /* misc. flags */
237 RF_DiskQueueData_t *req;/* the request that this was part of.. */
238 };
239
240
241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
242 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
243
244 /* XXX Not sure if the following should be replacing the raidPtrs above,
245 or if it should be used in conjunction with that... */
246
247 struct raid_softc {
248 int sc_flags; /* flags */
249 int sc_cflags; /* configuration flags */
250 size_t sc_size; /* size of the raid device */
251 dev_t sc_dev; /* our device.. */
252 char sc_xname[20]; /* XXX external name */
253 struct disk sc_dkdev; /* generic disk device info */
254 struct pool sc_cbufpool; /* component buffer pool */
255 };
256 /* sc_flags */
257 #define RAIDF_INITED 0x01 /* unit has been initialized */
258 #define RAIDF_WLABEL 0x02 /* label area is writable */
259 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
260 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
261 #define RAIDF_LOCKED 0x80 /* unit is locked */
262
263 #define raidunit(x) DISKUNIT(x)
264 static int numraid = 0;
265
266 /*
267 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
268 * Be aware that large numbers can allow the driver to consume a lot of
269 * kernel memory, especially on writes, and in degraded mode reads.
270 *
271 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
272 * a single 64K write will typically require 64K for the old data,
273 * 64K for the old parity, and 64K for the new parity, for a total
274 * of 192K (if the parity buffer is not re-used immediately).
275 * Even it if is used immedately, that's still 128K, which when multiplied
276 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
277 *
278 * Now in degraded mode, for example, a 64K read on the above setup may
279 * require data reconstruction, which will require *all* of the 4 remaining
280 * disks to participate -- 4 * 32K/disk == 128K again.
281 */
282
283 #ifndef RAIDOUTSTANDING
284 #define RAIDOUTSTANDING 6
285 #endif
286
287 #define RAIDLABELDEV(dev) \
288 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
289
290 /* declared here, and made public, for the benefit of KVM stuff.. */
291 struct raid_softc *raid_softc;
292
293 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
294 struct disklabel *));
295 static void raidgetdisklabel __P((dev_t));
296 static void raidmakedisklabel __P((struct raid_softc *));
297
298 static int raidlock __P((struct raid_softc *));
299 static void raidunlock __P((struct raid_softc *));
300 int raidlookup __P((char *, struct proc * p, struct vnode **));
301
302 static void rf_markalldirty __P((RF_Raid_t *));
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336 recon_queue = NULL;
337
338 for (i = 0; i < numraid; i++)
339 raidPtrs[i] = NULL;
340 rc = rf_BootRaidframe();
341 if (rc == 0)
342 printf("Kernelized RAIDframe activated\n");
343 else
344 panic("Serious error booting RAID!!\n");
345
346 /* put together some datastructures like the CCD device does.. This
347 * lets us lock the device and what-not when it gets opened. */
348
349 raid_softc = (struct raid_softc *)
350 malloc(num * sizeof(struct raid_softc),
351 M_RAIDFRAME, M_NOWAIT);
352 if (raid_softc == NULL) {
353 printf("WARNING: no memory for RAIDframe driver\n");
354 return;
355 }
356 numraid = num;
357 bzero(raid_softc, num * sizeof(struct raid_softc));
358
359 for (raidID = 0; raidID < num; raidID++) {
360 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
361 (RF_Raid_t *));
362 if (raidPtrs[raidID] == NULL) {
363 printf("raidPtrs[%d] is NULL\n", raidID);
364 }
365 }
366 }
367
368
369 int
370 raidsize(dev)
371 dev_t dev;
372 {
373 struct raid_softc *rs;
374 struct disklabel *lp;
375 int part, unit, omask, size;
376
377 unit = raidunit(dev);
378 if (unit >= numraid)
379 return (-1);
380 rs = &raid_softc[unit];
381
382 if ((rs->sc_flags & RAIDF_INITED) == 0)
383 return (-1);
384
385 part = DISKPART(dev);
386 omask = rs->sc_dkdev.dk_openmask & (1 << part);
387 lp = rs->sc_dkdev.dk_label;
388
389 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
390 return (-1);
391
392 if (lp->d_partitions[part].p_fstype != FS_SWAP)
393 size = -1;
394 else
395 size = lp->d_partitions[part].p_size *
396 (lp->d_secsize / DEV_BSIZE);
397
398 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
399 return (-1);
400
401 return (size);
402
403 }
404
405 int
406 raiddump(dev, blkno, va, size)
407 dev_t dev;
408 daddr_t blkno;
409 caddr_t va;
410 size_t size;
411 {
412 /* Not implemented. */
413 return ENXIO;
414 }
415 /* ARGSUSED */
416 int
417 raidopen(dev, flags, fmt, p)
418 dev_t dev;
419 int flags, fmt;
420 struct proc *p;
421 {
422 int unit = raidunit(dev);
423 struct raid_softc *rs;
424 struct disklabel *lp;
425 int part, pmask;
426 int error = 0;
427
428 if (unit >= numraid)
429 return (ENXIO);
430 rs = &raid_softc[unit];
431
432 if ((error = raidlock(rs)) != 0)
433 return (error);
434 lp = rs->sc_dkdev.dk_label;
435
436 part = DISKPART(dev);
437 pmask = (1 << part);
438
439 db1_printf(("Opening raid device number: %d partition: %d\n",
440 unit, part));
441
442
443 if ((rs->sc_flags & RAIDF_INITED) &&
444 (rs->sc_dkdev.dk_openmask == 0))
445 raidgetdisklabel(dev);
446
447 /* make sure that this partition exists */
448
449 if (part != RAW_PART) {
450 db1_printf(("Not a raw partition..\n"));
451 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
452 ((part >= lp->d_npartitions) ||
453 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
454 error = ENXIO;
455 raidunlock(rs);
456 db1_printf(("Bailing out...\n"));
457 return (error);
458 }
459 }
460 /* Prevent this unit from being unconfigured while open. */
461 switch (fmt) {
462 case S_IFCHR:
463 rs->sc_dkdev.dk_copenmask |= pmask;
464 break;
465
466 case S_IFBLK:
467 rs->sc_dkdev.dk_bopenmask |= pmask;
468 break;
469 }
470
471 if ((rs->sc_dkdev.dk_openmask == 0) &&
472 ((rs->sc_flags & RAIDF_INITED) != 0)) {
473 /* First one... mark things as dirty... Note that we *MUST*
474 have done a configure before this. I DO NOT WANT TO BE
475 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
476 THAT THEY BELONG TOGETHER!!!!! */
477 /* XXX should check to see if we're only open for reading
478 here... If so, we needn't do this, but then need some
479 other way of keeping track of what's happened.. */
480
481 rf_markalldirty( raidPtrs[unit] );
482 }
483
484
485 rs->sc_dkdev.dk_openmask =
486 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
487
488 raidunlock(rs);
489
490 return (error);
491
492
493 }
494 /* ARGSUSED */
495 int
496 raidclose(dev, flags, fmt, p)
497 dev_t dev;
498 int flags, fmt;
499 struct proc *p;
500 {
501 int unit = raidunit(dev);
502 struct raid_softc *rs;
503 int error = 0;
504 int part;
505
506 if (unit >= numraid)
507 return (ENXIO);
508 rs = &raid_softc[unit];
509
510 if ((error = raidlock(rs)) != 0)
511 return (error);
512
513 part = DISKPART(dev);
514
515 /* ...that much closer to allowing unconfiguration... */
516 switch (fmt) {
517 case S_IFCHR:
518 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
519 break;
520
521 case S_IFBLK:
522 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
523 break;
524 }
525 rs->sc_dkdev.dk_openmask =
526 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
527
528 if ((rs->sc_dkdev.dk_openmask == 0) &&
529 ((rs->sc_flags & RAIDF_INITED) != 0)) {
530 /* Last one... device is not unconfigured yet.
531 Device shutdown has taken care of setting the
532 clean bits if RAIDF_INITED is not set
533 mark things as clean... */
534 rf_update_component_labels( raidPtrs[unit] );
535 }
536
537 raidunlock(rs);
538 return (0);
539
540 }
541
542 void
543 raidstrategy(bp)
544 register struct buf *bp;
545 {
546 register int s;
547
548 unsigned int raidID = raidunit(bp->b_dev);
549 RF_Raid_t *raidPtr;
550 struct raid_softc *rs = &raid_softc[raidID];
551 struct disklabel *lp;
552 int wlabel;
553
554 #if 0
555 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
556 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
557 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
558 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
559 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
560
561 if (bp->b_flags & B_READ)
562 db1_printf(("READ\n"));
563 else
564 db1_printf(("WRITE\n"));
565 #endif
566 if ((rs->sc_flags & RAIDF_INITED) ==0) {
567 bp->b_error = ENXIO;
568 bp->b_flags = B_ERROR;
569 bp->b_resid = bp->b_bcount;
570 biodone(bp);
571 return;
572 }
573 if (raidID >= numraid || !raidPtrs[raidID]) {
574 bp->b_error = ENODEV;
575 bp->b_flags |= B_ERROR;
576 bp->b_resid = bp->b_bcount;
577 biodone(bp);
578 return;
579 }
580 raidPtr = raidPtrs[raidID];
581 if (!raidPtr->valid) {
582 bp->b_error = ENODEV;
583 bp->b_flags |= B_ERROR;
584 bp->b_resid = bp->b_bcount;
585 biodone(bp);
586 return;
587 }
588 if (bp->b_bcount == 0) {
589 db1_printf(("b_bcount is zero..\n"));
590 biodone(bp);
591 return;
592 }
593 lp = rs->sc_dkdev.dk_label;
594
595 /*
596 * Do bounds checking and adjust transfer. If there's an
597 * error, the bounds check will flag that for us.
598 */
599
600 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
601 if (DISKPART(bp->b_dev) != RAW_PART)
602 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
603 db1_printf(("Bounds check failed!!:%d %d\n",
604 (int) bp->b_blkno, (int) wlabel));
605 biodone(bp);
606 return;
607 }
608 s = splbio(); /* XXX Needed? */
609 db1_printf(("Beginning strategy...\n"));
610
611 bp->b_resid = 0;
612 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
613 NULL, NULL, NULL);
614 if (bp->b_error) {
615 bp->b_flags |= B_ERROR;
616 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
617 bp->b_error));
618 }
619 splx(s);
620 #if 0
621 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
622 bp, bp->b_data,
623 (int) bp->b_bcount, (int) bp->b_resid));
624 #endif
625 }
626 /* ARGSUSED */
627 int
628 raidread(dev, uio, flags)
629 dev_t dev;
630 struct uio *uio;
631 int flags;
632 {
633 int unit = raidunit(dev);
634 struct raid_softc *rs;
635 int part;
636
637 if (unit >= numraid)
638 return (ENXIO);
639 rs = &raid_softc[unit];
640
641 if ((rs->sc_flags & RAIDF_INITED) == 0)
642 return (ENXIO);
643 part = DISKPART(dev);
644
645 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
646
647 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
648
649 }
650 /* ARGSUSED */
651 int
652 raidwrite(dev, uio, flags)
653 dev_t dev;
654 struct uio *uio;
655 int flags;
656 {
657 int unit = raidunit(dev);
658 struct raid_softc *rs;
659
660 if (unit >= numraid)
661 return (ENXIO);
662 rs = &raid_softc[unit];
663
664 if ((rs->sc_flags & RAIDF_INITED) == 0)
665 return (ENXIO);
666 db1_printf(("raidwrite\n"));
667 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
668
669 }
670
671 int
672 raidioctl(dev, cmd, data, flag, p)
673 dev_t dev;
674 u_long cmd;
675 caddr_t data;
676 int flag;
677 struct proc *p;
678 {
679 int unit = raidunit(dev);
680 int error = 0;
681 int part, pmask;
682 struct raid_softc *rs;
683 RF_Config_t *k_cfg, *u_cfg;
684 u_char *specific_buf;
685 int retcode = 0;
686 int row;
687 int column;
688 int s;
689 struct rf_recon_req *rrcopy, *rr;
690 RF_ComponentLabel_t *component_label;
691 RF_ComponentLabel_t ci_label;
692 RF_ComponentLabel_t **c_label_ptr;
693 RF_SingleComponent_t *sparePtr,*componentPtr;
694 RF_SingleComponent_t hot_spare;
695 RF_SingleComponent_t component;
696
697 if (unit >= numraid)
698 return (ENXIO);
699 rs = &raid_softc[unit];
700
701 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
702 (int) DISKPART(dev), (int) unit, (int) cmd));
703
704 /* Must be open for writes for these commands... */
705 switch (cmd) {
706 case DIOCSDINFO:
707 case DIOCWDINFO:
708 case DIOCWLABEL:
709 if ((flag & FWRITE) == 0)
710 return (EBADF);
711 }
712
713 /* Must be initialized for these... */
714 switch (cmd) {
715 case DIOCGDINFO:
716 case DIOCSDINFO:
717 case DIOCWDINFO:
718 case DIOCGPART:
719 case DIOCWLABEL:
720 case DIOCGDEFLABEL:
721 case RAIDFRAME_SHUTDOWN:
722 case RAIDFRAME_REWRITEPARITY:
723 case RAIDFRAME_GET_INFO:
724 case RAIDFRAME_RESET_ACCTOTALS:
725 case RAIDFRAME_GET_ACCTOTALS:
726 case RAIDFRAME_KEEP_ACCTOTALS:
727 case RAIDFRAME_GET_SIZE:
728 case RAIDFRAME_FAIL_DISK:
729 case RAIDFRAME_COPYBACK:
730 case RAIDFRAME_CHECKRECON:
731 case RAIDFRAME_GET_COMPONENT_LABEL:
732 case RAIDFRAME_SET_COMPONENT_LABEL:
733 case RAIDFRAME_ADD_HOT_SPARE:
734 case RAIDFRAME_REMOVE_HOT_SPARE:
735 case RAIDFRAME_INIT_LABELS:
736 case RAIDFRAME_REBUILD_IN_PLACE:
737 case RAIDFRAME_CHECK_PARITY:
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 }
741
742 switch (cmd) {
743
744
745 /* configure the system */
746 case RAIDFRAME_CONFIGURE:
747
748 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
749 /* copy-in the configuration information */
750 /* data points to a pointer to the configuration structure */
751 u_cfg = *((RF_Config_t **) data);
752 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
753 if (k_cfg == NULL) {
754 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
755 return (ENOMEM);
756 }
757 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
758 sizeof(RF_Config_t));
759 if (retcode) {
760 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
761 retcode));
762 return (retcode);
763 }
764 /* allocate a buffer for the layout-specific data, and copy it
765 * in */
766 if (k_cfg->layoutSpecificSize) {
767 if (k_cfg->layoutSpecificSize > 10000) {
768 /* sanity check */
769 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
770 return (EINVAL);
771 }
772 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
773 (u_char *));
774 if (specific_buf == NULL) {
775 RF_Free(k_cfg, sizeof(RF_Config_t));
776 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
777 return (ENOMEM);
778 }
779 retcode = copyin(k_cfg->layoutSpecific,
780 (caddr_t) specific_buf,
781 k_cfg->layoutSpecificSize);
782 if (retcode) {
783 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
784 retcode));
785 return (retcode);
786 }
787 } else
788 specific_buf = NULL;
789 k_cfg->layoutSpecific = specific_buf;
790
791 /* should do some kind of sanity check on the configuration.
792 * Store the sum of all the bytes in the last byte? */
793
794 /* configure the system */
795
796 raidPtrs[unit]->raidid = unit;
797
798 retcode = rf_Configure(raidPtrs[unit], k_cfg);
799
800 /* allow this many simultaneous IO's to this RAID device */
801 raidPtrs[unit]->openings = RAIDOUTSTANDING;
802
803 if (retcode == 0) {
804 retcode = raidinit(dev, raidPtrs[unit], unit);
805 rf_markalldirty( raidPtrs[unit] );
806 }
807 /* free the buffers. No return code here. */
808 if (k_cfg->layoutSpecificSize) {
809 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
810 }
811 RF_Free(k_cfg, sizeof(RF_Config_t));
812
813 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
814 retcode));
815
816 return (retcode);
817
818 /* shutdown the system */
819 case RAIDFRAME_SHUTDOWN:
820
821 if ((error = raidlock(rs)) != 0)
822 return (error);
823
824 /*
825 * If somebody has a partition mounted, we shouldn't
826 * shutdown.
827 */
828
829 part = DISKPART(dev);
830 pmask = (1 << part);
831 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
832 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
833 (rs->sc_dkdev.dk_copenmask & pmask))) {
834 raidunlock(rs);
835 return (EBUSY);
836 }
837
838 if (rf_debugKernelAccess) {
839 printf("call shutdown\n");
840 }
841
842 retcode = rf_Shutdown(raidPtrs[unit]);
843
844 db1_printf(("Done main shutdown\n"));
845
846 pool_destroy(&rs->sc_cbufpool);
847 db1_printf(("Done freeing component buffer freelist\n"));
848
849 /* It's no longer initialized... */
850 rs->sc_flags &= ~RAIDF_INITED;
851
852 /* Detach the disk. */
853 disk_detach(&rs->sc_dkdev);
854
855 raidunlock(rs);
856
857 return (retcode);
858 case RAIDFRAME_GET_COMPONENT_LABEL:
859 c_label_ptr = (RF_ComponentLabel_t **) data;
860 /* need to read the component label for the disk indicated
861 by row,column in component_label
862 XXX need to sanity check these values!!!
863 */
864
865 /* For practice, let's get it directly fromdisk, rather
866 than from the in-core copy */
867 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
868 (RF_ComponentLabel_t *));
869 if (component_label == NULL)
870 return (ENOMEM);
871
872 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
873
874 retcode = copyin( *c_label_ptr, component_label,
875 sizeof(RF_ComponentLabel_t));
876
877 if (retcode) {
878 return(retcode);
879 }
880
881 row = component_label->row;
882 column = component_label->column;
883
884 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
885 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
886 return(EINVAL);
887 }
888
889 raidread_component_label(
890 raidPtrs[unit]->Disks[row][column].dev,
891 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
892 component_label );
893
894 retcode = copyout((caddr_t) component_label,
895 (caddr_t) *c_label_ptr,
896 sizeof(RF_ComponentLabel_t));
897 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
898 return (retcode);
899
900 case RAIDFRAME_SET_COMPONENT_LABEL:
901 component_label = (RF_ComponentLabel_t *) data;
902
903 /* XXX check the label for valid stuff... */
904 /* Note that some things *should not* get modified --
905 the user should be re-initing the labels instead of
906 trying to patch things.
907 */
908
909 printf("Got component label:\n");
910 printf("Version: %d\n",component_label->version);
911 printf("Serial Number: %d\n",component_label->serial_number);
912 printf("Mod counter: %d\n",component_label->mod_counter);
913 printf("Row: %d\n", component_label->row);
914 printf("Column: %d\n", component_label->column);
915 printf("Num Rows: %d\n", component_label->num_rows);
916 printf("Num Columns: %d\n", component_label->num_columns);
917 printf("Clean: %d\n", component_label->clean);
918 printf("Status: %d\n", component_label->status);
919
920 row = component_label->row;
921 column = component_label->column;
922
923 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
924 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
925 return(EINVAL);
926 }
927
928 /* XXX this isn't allowed to do anything for now :-) */
929 #if 0
930 raidwrite_component_label(
931 raidPtrs[unit]->Disks[row][column].dev,
932 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
933 component_label );
934 #endif
935 return (0);
936
937 case RAIDFRAME_INIT_LABELS:
938 component_label = (RF_ComponentLabel_t *) data;
939 /*
940 we only want the serial number from
941 the above. We get all the rest of the information
942 from the config that was used to create this RAID
943 set.
944 */
945
946 raidPtrs[unit]->serial_number = component_label->serial_number;
947 /* current version number */
948 ci_label.version = RF_COMPONENT_LABEL_VERSION;
949 ci_label.serial_number = component_label->serial_number;
950 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
951 ci_label.num_rows = raidPtrs[unit]->numRow;
952 ci_label.num_columns = raidPtrs[unit]->numCol;
953 ci_label.clean = RF_RAID_DIRTY; /* not clean */
954 ci_label.status = rf_ds_optimal; /* "It's good!" */
955
956 for(row=0;row<raidPtrs[unit]->numRow;row++) {
957 ci_label.row = row;
958 for(column=0;column<raidPtrs[unit]->numCol;column++) {
959 ci_label.column = column;
960 raidwrite_component_label(
961 raidPtrs[unit]->Disks[row][column].dev,
962 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
963 &ci_label );
964 }
965 }
966
967 return (retcode);
968
969 /* initialize all parity */
970 case RAIDFRAME_REWRITEPARITY:
971
972 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
973 /* Parity for RAID 0 is trivially correct */
974 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
975 return(0);
976 }
977
978 /* borrow the thread of the requesting process */
979
980 s = splbio();
981 retcode = rf_RewriteParity(raidPtrs[unit]);
982 splx(s);
983 /* return I/O Error if the parity rewrite fails */
984
985 if (retcode) {
986 retcode = EIO;
987 } else {
988 /* set the clean bit! If we shutdown correctly,
989 the clean bit on each component label will get
990 set */
991 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
992 }
993 return (retcode);
994
995
996 case RAIDFRAME_ADD_HOT_SPARE:
997 sparePtr = (RF_SingleComponent_t *) data;
998 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
999 printf("Adding spare\n");
1000 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1001 return(retcode);
1002
1003 case RAIDFRAME_REMOVE_HOT_SPARE:
1004 return(retcode);
1005
1006 case RAIDFRAME_REBUILD_IN_PLACE:
1007
1008 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1009 /* Can't do this on a RAID 0!! */
1010 return(EINVAL);
1011 }
1012
1013 componentPtr = (RF_SingleComponent_t *) data;
1014 memcpy( &component, componentPtr,
1015 sizeof(RF_SingleComponent_t));
1016 row = component.row;
1017 column = component.column;
1018 printf("Rebuild: %d %d\n",row, column);
1019 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1020 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1021 return(EINVAL);
1022 }
1023 printf("Attempting a rebuild in place\n");
1024 s = splbio();
1025 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1026 splx(s);
1027 return(retcode);
1028
1029 case RAIDFRAME_GET_INFO:
1030 {
1031 RF_Raid_t *raid = raidPtrs[unit];
1032 RF_DeviceConfig_t *cfg, **ucfgp;
1033 int i, j, d;
1034
1035 if (!raid->valid)
1036 return (ENODEV);
1037 ucfgp = (RF_DeviceConfig_t **) data;
1038 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1039 (RF_DeviceConfig_t *));
1040 if (cfg == NULL)
1041 return (ENOMEM);
1042 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1043 cfg->rows = raid->numRow;
1044 cfg->cols = raid->numCol;
1045 cfg->ndevs = raid->numRow * raid->numCol;
1046 if (cfg->ndevs >= RF_MAX_DISKS) {
1047 cfg->ndevs = 0;
1048 return (ENOMEM);
1049 }
1050 cfg->nspares = raid->numSpare;
1051 if (cfg->nspares >= RF_MAX_DISKS) {
1052 cfg->nspares = 0;
1053 return (ENOMEM);
1054 }
1055 cfg->maxqdepth = raid->maxQueueDepth;
1056 d = 0;
1057 for (i = 0; i < cfg->rows; i++) {
1058 for (j = 0; j < cfg->cols; j++) {
1059 cfg->devs[d] = raid->Disks[i][j];
1060 d++;
1061 }
1062 }
1063 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1064 cfg->spares[i] = raid->Disks[0][j];
1065 }
1066 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1067 sizeof(RF_DeviceConfig_t));
1068 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1069
1070 return (retcode);
1071 }
1072 break;
1073 case RAIDFRAME_CHECK_PARITY:
1074 *(int *) data = raidPtrs[unit]->parity_good;
1075 return (0);
1076 case RAIDFRAME_RESET_ACCTOTALS:
1077 {
1078 RF_Raid_t *raid = raidPtrs[unit];
1079
1080 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1081 return (0);
1082 }
1083 break;
1084
1085 case RAIDFRAME_GET_ACCTOTALS:
1086 {
1087 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1088 RF_Raid_t *raid = raidPtrs[unit];
1089
1090 *totals = raid->acc_totals;
1091 return (0);
1092 }
1093 break;
1094
1095 case RAIDFRAME_KEEP_ACCTOTALS:
1096 {
1097 RF_Raid_t *raid = raidPtrs[unit];
1098 int *keep = (int *) data;
1099
1100 raid->keep_acc_totals = *keep;
1101 return (0);
1102 }
1103 break;
1104
1105 case RAIDFRAME_GET_SIZE:
1106 *(int *) data = raidPtrs[unit]->totalSectors;
1107 return (0);
1108
1109 #define RAIDFRAME_RECON 1
1110 /* XXX The above should probably be set somewhere else!! GO */
1111 #if RAIDFRAME_RECON > 0
1112
1113 /* fail a disk & optionally start reconstruction */
1114 case RAIDFRAME_FAIL_DISK:
1115
1116 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1117 /* Can't do this on a RAID 0!! */
1118 return(EINVAL);
1119 }
1120
1121 rr = (struct rf_recon_req *) data;
1122
1123 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1124 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1125 return (EINVAL);
1126
1127 printf("raid%d: Failing the disk: row: %d col: %d\n",
1128 unit, rr->row, rr->col);
1129
1130 /* make a copy of the recon request so that we don't rely on
1131 * the user's buffer */
1132 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1133 bcopy(rr, rrcopy, sizeof(*rr));
1134 rrcopy->raidPtr = (void *) raidPtrs[unit];
1135
1136 LOCK_RECON_Q_MUTEX();
1137 rrcopy->next = recon_queue;
1138 recon_queue = rrcopy;
1139 wakeup(&recon_queue);
1140 UNLOCK_RECON_Q_MUTEX();
1141
1142 return (0);
1143
1144 /* invoke a copyback operation after recon on whatever disk
1145 * needs it, if any */
1146 case RAIDFRAME_COPYBACK:
1147
1148 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1149 /* This makes no sense on a RAID 0!! */
1150 return(EINVAL);
1151 }
1152
1153 /* borrow the current thread to get this done */
1154
1155 s = splbio();
1156 rf_CopybackReconstructedData(raidPtrs[unit]);
1157 splx(s);
1158 return (0);
1159
1160 /* return the percentage completion of reconstruction */
1161 case RAIDFRAME_CHECKRECON:
1162 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1163 /* This makes no sense on a RAID 0 */
1164 return(EINVAL);
1165 }
1166
1167 row = *(int *) data;
1168 if (row < 0 || row >= raidPtrs[unit]->numRow)
1169 return (EINVAL);
1170 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1171 *(int *) data = 100;
1172 else
1173 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1174 return (0);
1175
1176 /* the sparetable daemon calls this to wait for the kernel to
1177 * need a spare table. this ioctl does not return until a
1178 * spare table is needed. XXX -- calling mpsleep here in the
1179 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1180 * -- I should either compute the spare table in the kernel,
1181 * or have a different -- XXX XXX -- interface (a different
1182 * character device) for delivering the table -- XXX */
1183 #if 0
1184 case RAIDFRAME_SPARET_WAIT:
1185 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1186 while (!rf_sparet_wait_queue)
1187 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1188 waitreq = rf_sparet_wait_queue;
1189 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1190 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1191
1192 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1193
1194 RF_Free(waitreq, sizeof(*waitreq));
1195 return (0);
1196
1197
1198 /* wakes up a process waiting on SPARET_WAIT and puts an error
1199 * code in it that will cause the dameon to exit */
1200 case RAIDFRAME_ABORT_SPARET_WAIT:
1201 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1202 waitreq->fcol = -1;
1203 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1204 waitreq->next = rf_sparet_wait_queue;
1205 rf_sparet_wait_queue = waitreq;
1206 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1207 wakeup(&rf_sparet_wait_queue);
1208 return (0);
1209
1210 /* used by the spare table daemon to deliver a spare table
1211 * into the kernel */
1212 case RAIDFRAME_SEND_SPARET:
1213
1214 /* install the spare table */
1215 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1216
1217 /* respond to the requestor. the return status of the spare
1218 * table installation is passed in the "fcol" field */
1219 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1220 waitreq->fcol = retcode;
1221 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1222 waitreq->next = rf_sparet_resp_queue;
1223 rf_sparet_resp_queue = waitreq;
1224 wakeup(&rf_sparet_resp_queue);
1225 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1226
1227 return (retcode);
1228 #endif
1229
1230
1231 #endif /* RAIDFRAME_RECON > 0 */
1232
1233 default:
1234 break; /* fall through to the os-specific code below */
1235
1236 }
1237
1238 if (!raidPtrs[unit]->valid)
1239 return (EINVAL);
1240
1241 /*
1242 * Add support for "regular" device ioctls here.
1243 */
1244
1245 switch (cmd) {
1246 case DIOCGDINFO:
1247 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1248 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1249 break;
1250
1251 case DIOCGPART:
1252 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1253 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1254 ((struct partinfo *) data)->part =
1255 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1256 break;
1257
1258 case DIOCWDINFO:
1259 db1_printf(("DIOCWDINFO\n"));
1260 case DIOCSDINFO:
1261 db1_printf(("DIOCSDINFO\n"));
1262 if ((error = raidlock(rs)) != 0)
1263 return (error);
1264
1265 rs->sc_flags |= RAIDF_LABELLING;
1266
1267 error = setdisklabel(rs->sc_dkdev.dk_label,
1268 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1269 if (error == 0) {
1270 if (cmd == DIOCWDINFO)
1271 error = writedisklabel(RAIDLABELDEV(dev),
1272 raidstrategy, rs->sc_dkdev.dk_label,
1273 rs->sc_dkdev.dk_cpulabel);
1274 }
1275 rs->sc_flags &= ~RAIDF_LABELLING;
1276
1277 raidunlock(rs);
1278
1279 if (error)
1280 return (error);
1281 break;
1282
1283 case DIOCWLABEL:
1284 db1_printf(("DIOCWLABEL\n"));
1285 if (*(int *) data != 0)
1286 rs->sc_flags |= RAIDF_WLABEL;
1287 else
1288 rs->sc_flags &= ~RAIDF_WLABEL;
1289 break;
1290
1291 case DIOCGDEFLABEL:
1292 db1_printf(("DIOCGDEFLABEL\n"));
1293 raidgetdefaultlabel(raidPtrs[unit], rs,
1294 (struct disklabel *) data);
1295 break;
1296
1297 default:
1298 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1299 }
1300 return (retcode);
1301
1302 }
1303
1304
1305 /* raidinit -- complete the rest of the initialization for the
1306 RAIDframe device. */
1307
1308
1309 static int
1310 raidinit(dev, raidPtr, unit)
1311 dev_t dev;
1312 RF_Raid_t *raidPtr;
1313 int unit;
1314 {
1315 int retcode;
1316 /* int ix; */
1317 /* struct raidbuf *raidbp; */
1318 struct raid_softc *rs;
1319
1320 retcode = 0;
1321
1322 rs = &raid_softc[unit];
1323 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1324 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1325
1326
1327 /* XXX should check return code first... */
1328 rs->sc_flags |= RAIDF_INITED;
1329
1330 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1331
1332 rs->sc_dkdev.dk_name = rs->sc_xname;
1333
1334 /* disk_attach actually creates space for the CPU disklabel, among
1335 * other things, so it's critical to call this *BEFORE* we try putzing
1336 * with disklabels. */
1337
1338 disk_attach(&rs->sc_dkdev);
1339
1340 /* XXX There may be a weird interaction here between this, and
1341 * protectedSectors, as used in RAIDframe. */
1342
1343 rs->sc_size = raidPtr->totalSectors;
1344 rs->sc_dev = dev;
1345
1346 return (retcode);
1347 }
1348
1349 /*
1350 * This kernel thread never exits. It is created once, and persists
1351 * until the system reboots.
1352 */
1353
1354 void
1355 rf_ReconKernelThread()
1356 {
1357 struct rf_recon_req *req;
1358 int s;
1359
1360 /* XXX not sure what spl() level we should be at here... probably
1361 * splbio() */
1362 s = splbio();
1363
1364 while (1) {
1365 /* grab the next reconstruction request from the queue */
1366 LOCK_RECON_Q_MUTEX();
1367 while (!recon_queue) {
1368 UNLOCK_RECON_Q_MUTEX();
1369 tsleep(&recon_queue, PRIBIO,
1370 "raidframe recon", 0);
1371 LOCK_RECON_Q_MUTEX();
1372 }
1373 req = recon_queue;
1374 recon_queue = recon_queue->next;
1375 UNLOCK_RECON_Q_MUTEX();
1376
1377 /*
1378 * If flags specifies that we should start recon, this call
1379 * will not return until reconstruction completes, fails,
1380 * or is aborted.
1381 */
1382 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1383 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1384
1385 RF_Free(req, sizeof(*req));
1386 }
1387 }
1388 /* wake up the daemon & tell it to get us a spare table
1389 * XXX
1390 * the entries in the queues should be tagged with the raidPtr
1391 * so that in the extremely rare case that two recons happen at once,
1392 * we know for which device were requesting a spare table
1393 * XXX
1394 */
1395 int
1396 rf_GetSpareTableFromDaemon(req)
1397 RF_SparetWait_t *req;
1398 {
1399 int retcode;
1400
1401 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1402 req->next = rf_sparet_wait_queue;
1403 rf_sparet_wait_queue = req;
1404 wakeup(&rf_sparet_wait_queue);
1405
1406 /* mpsleep unlocks the mutex */
1407 while (!rf_sparet_resp_queue) {
1408 tsleep(&rf_sparet_resp_queue, PRIBIO,
1409 "raidframe getsparetable", 0);
1410 }
1411 req = rf_sparet_resp_queue;
1412 rf_sparet_resp_queue = req->next;
1413 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1414
1415 retcode = req->fcol;
1416 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1417 * alloc'd */
1418 return (retcode);
1419 }
1420 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1421 * bp & passes it down.
1422 * any calls originating in the kernel must use non-blocking I/O
1423 * do some extra sanity checking to return "appropriate" error values for
1424 * certain conditions (to make some standard utilities work)
1425 */
1426 int
1427 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1428 RF_Raid_t *raidPtr;
1429 struct buf *bp;
1430 RF_RaidAccessFlags_t flags;
1431 void (*cbFunc) (struct buf *);
1432 void *cbArg;
1433 {
1434 RF_SectorCount_t num_blocks, pb, sum;
1435 RF_RaidAddr_t raid_addr;
1436 int retcode;
1437 struct partition *pp;
1438 daddr_t blocknum;
1439 int unit;
1440 struct raid_softc *rs;
1441 int do_async;
1442
1443 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1444
1445 unit = raidPtr->raidid;
1446 rs = &raid_softc[unit];
1447
1448 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1449 * partition.. Need to make it absolute to the underlying device.. */
1450
1451 blocknum = bp->b_blkno;
1452 if (DISKPART(bp->b_dev) != RAW_PART) {
1453 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1454 blocknum += pp->p_offset;
1455 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1456 pp->p_offset));
1457 } else {
1458 db1_printf(("Is raw..\n"));
1459 }
1460 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1461
1462 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1463 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1464
1465 /* *THIS* is where we adjust what block we're going to... but DO NOT
1466 * TOUCH bp->b_blkno!!! */
1467 raid_addr = blocknum;
1468
1469 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1470 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1471 sum = raid_addr + num_blocks + pb;
1472 if (1 || rf_debugKernelAccess) {
1473 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1474 (int) raid_addr, (int) sum, (int) num_blocks,
1475 (int) pb, (int) bp->b_resid));
1476 }
1477 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1478 || (sum < num_blocks) || (sum < pb)) {
1479 bp->b_error = ENOSPC;
1480 bp->b_flags |= B_ERROR;
1481 bp->b_resid = bp->b_bcount;
1482 biodone(bp);
1483 return (bp->b_error);
1484 }
1485 /*
1486 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1487 */
1488
1489 if (bp->b_bcount & raidPtr->sectorMask) {
1490 bp->b_error = EINVAL;
1491 bp->b_flags |= B_ERROR;
1492 bp->b_resid = bp->b_bcount;
1493 biodone(bp);
1494 return (bp->b_error);
1495 }
1496 db1_printf(("Calling DoAccess..\n"));
1497
1498
1499 /* Put a throttle on the number of requests we handle simultanously */
1500
1501 RF_LOCK_MUTEX(raidPtr->mutex);
1502
1503 while(raidPtr->openings <= 0) {
1504 RF_UNLOCK_MUTEX(raidPtr->mutex);
1505 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1506 RF_LOCK_MUTEX(raidPtr->mutex);
1507 }
1508 raidPtr->openings--;
1509
1510 RF_UNLOCK_MUTEX(raidPtr->mutex);
1511
1512 /*
1513 * Everything is async.
1514 */
1515 do_async = 1;
1516
1517 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1518 * B_READ instead */
1519 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1520 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1521 do_async, raid_addr, num_blocks,
1522 bp->b_un.b_addr,
1523 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1524 NULL, cbFunc, cbArg);
1525 #if 0
1526 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1527 bp->b_data, (int) bp->b_resid));
1528 #endif
1529
1530 return (retcode);
1531 }
1532 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1533
1534 int
1535 rf_DispatchKernelIO(queue, req)
1536 RF_DiskQueue_t *queue;
1537 RF_DiskQueueData_t *req;
1538 {
1539 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1540 struct buf *bp;
1541 struct raidbuf *raidbp = NULL;
1542 struct raid_softc *rs;
1543 int unit;
1544
1545 /* XXX along with the vnode, we also need the softc associated with
1546 * this device.. */
1547
1548 req->queue = queue;
1549
1550 unit = queue->raidPtr->raidid;
1551
1552 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1553
1554 if (unit >= numraid) {
1555 printf("Invalid unit number: %d %d\n", unit, numraid);
1556 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1557 }
1558 rs = &raid_softc[unit];
1559
1560 /* XXX is this the right place? */
1561 disk_busy(&rs->sc_dkdev);
1562
1563 bp = req->bp;
1564 #if 1
1565 /* XXX when there is a physical disk failure, someone is passing us a
1566 * buffer that contains old stuff!! Attempt to deal with this problem
1567 * without taking a performance hit... (not sure where the real bug
1568 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1569
1570 if (bp->b_flags & B_ERROR) {
1571 bp->b_flags &= ~B_ERROR;
1572 }
1573 if (bp->b_error != 0) {
1574 bp->b_error = 0;
1575 }
1576 #endif
1577 raidbp = RAIDGETBUF(rs);
1578
1579 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1580
1581 /*
1582 * context for raidiodone
1583 */
1584 raidbp->rf_obp = bp;
1585 raidbp->req = req;
1586
1587 switch (req->type) {
1588 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1589 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1590 * queue->row, queue->col); */
1591 /* XXX need to do something extra here.. */
1592 /* I'm leaving this in, as I've never actually seen it used,
1593 * and I'd like folks to report it... GO */
1594 printf(("WAKEUP CALLED\n"));
1595 queue->numOutstanding++;
1596
1597 /* XXX need to glue the original buffer into this?? */
1598
1599 KernelWakeupFunc(&raidbp->rf_buf);
1600 break;
1601
1602 case RF_IO_TYPE_READ:
1603 case RF_IO_TYPE_WRITE:
1604
1605 if (req->tracerec) {
1606 RF_ETIMER_START(req->tracerec->timer);
1607 }
1608 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1609 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1610 req->sectorOffset, req->numSector,
1611 req->buf, KernelWakeupFunc, (void *) req,
1612 queue->raidPtr->logBytesPerSector, req->b_proc);
1613
1614 if (rf_debugKernelAccess) {
1615 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1616 (long) bp->b_blkno));
1617 }
1618 queue->numOutstanding++;
1619 queue->last_deq_sector = req->sectorOffset;
1620 /* acc wouldn't have been let in if there were any pending
1621 * reqs at any other priority */
1622 queue->curPriority = req->priority;
1623 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1624 * req->type, queue->row, queue->col); */
1625
1626 db1_printf(("Going for %c to unit %d row %d col %d\n",
1627 req->type, unit, queue->row, queue->col));
1628 db1_printf(("sector %d count %d (%d bytes) %d\n",
1629 (int) req->sectorOffset, (int) req->numSector,
1630 (int) (req->numSector <<
1631 queue->raidPtr->logBytesPerSector),
1632 (int) queue->raidPtr->logBytesPerSector));
1633 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1634 raidbp->rf_buf.b_vp->v_numoutput++;
1635 }
1636 VOP_STRATEGY(&raidbp->rf_buf);
1637
1638 break;
1639
1640 default:
1641 panic("bad req->type in rf_DispatchKernelIO");
1642 }
1643 db1_printf(("Exiting from DispatchKernelIO\n"));
1644 return (0);
1645 }
1646 /* this is the callback function associated with a I/O invoked from
1647 kernel code.
1648 */
1649 static void
1650 KernelWakeupFunc(vbp)
1651 struct buf *vbp;
1652 {
1653 RF_DiskQueueData_t *req = NULL;
1654 RF_DiskQueue_t *queue;
1655 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1656 struct buf *bp;
1657 struct raid_softc *rs;
1658 int unit;
1659 register int s;
1660
1661 s = splbio(); /* XXX */
1662 db1_printf(("recovering the request queue:\n"));
1663 req = raidbp->req;
1664
1665 bp = raidbp->rf_obp;
1666 #if 0
1667 db1_printf(("bp=0x%x\n", bp));
1668 #endif
1669
1670 queue = (RF_DiskQueue_t *) req->queue;
1671
1672 if (raidbp->rf_buf.b_flags & B_ERROR) {
1673 #if 0
1674 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1675 #endif
1676 bp->b_flags |= B_ERROR;
1677 bp->b_error = raidbp->rf_buf.b_error ?
1678 raidbp->rf_buf.b_error : EIO;
1679 }
1680 #if 0
1681 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1682 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1683 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1684 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1685 #endif
1686
1687 /* XXX methinks this could be wrong... */
1688 #if 1
1689 bp->b_resid = raidbp->rf_buf.b_resid;
1690 #endif
1691
1692 if (req->tracerec) {
1693 RF_ETIMER_STOP(req->tracerec->timer);
1694 RF_ETIMER_EVAL(req->tracerec->timer);
1695 RF_LOCK_MUTEX(rf_tracing_mutex);
1696 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1697 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1698 req->tracerec->num_phys_ios++;
1699 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1700 }
1701 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1702
1703 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1704
1705
1706 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1707 * ballistic, and mark the component as hosed... */
1708 #if 1
1709 if (bp->b_flags & B_ERROR) {
1710 /* Mark the disk as dead */
1711 /* but only mark it once... */
1712 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1713 rf_ds_optimal) {
1714 printf("raid%d: IO Error. Marking %s as failed.\n",
1715 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1716 queue->raidPtr->Disks[queue->row][queue->col].status =
1717 rf_ds_failed;
1718 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1719 queue->raidPtr->numFailures++;
1720 /* XXX here we should bump the version number for each component, and write that data out */
1721 } else { /* Disk is already dead... */
1722 /* printf("Disk already marked as dead!\n"); */
1723 }
1724
1725 }
1726 #endif
1727
1728 rs = &raid_softc[unit];
1729 RAIDPUTBUF(rs, raidbp);
1730
1731
1732 if (bp->b_resid == 0) {
1733 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1734 unit, bp->b_resid, bp->b_bcount));
1735 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1736 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1737 } else {
1738 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1739 }
1740
1741 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1742 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1743 /* printf("Exiting KernelWakeupFunc\n"); */
1744
1745 splx(s); /* XXX */
1746 }
1747
1748
1749
1750 /*
1751 * initialize a buf structure for doing an I/O in the kernel.
1752 */
1753 static void
1754 InitBP(
1755 struct buf * bp,
1756 struct vnode * b_vp,
1757 unsigned rw_flag,
1758 dev_t dev,
1759 RF_SectorNum_t startSect,
1760 RF_SectorCount_t numSect,
1761 caddr_t buf,
1762 void (*cbFunc) (struct buf *),
1763 void *cbArg,
1764 int logBytesPerSector,
1765 struct proc * b_proc)
1766 {
1767 /* bp->b_flags = B_PHYS | rw_flag; */
1768 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1769 bp->b_bcount = numSect << logBytesPerSector;
1770 bp->b_bufsize = bp->b_bcount;
1771 bp->b_error = 0;
1772 bp->b_dev = dev;
1773 db1_printf(("bp->b_dev is %d\n", dev));
1774 bp->b_un.b_addr = buf;
1775 #if 0
1776 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1777 #endif
1778
1779 bp->b_blkno = startSect;
1780 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1781 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1782 if (bp->b_bcount == 0) {
1783 panic("bp->b_bcount is zero in InitBP!!\n");
1784 }
1785 bp->b_proc = b_proc;
1786 bp->b_iodone = cbFunc;
1787 bp->b_vp = b_vp;
1788
1789 }
1790
1791 static void
1792 raidgetdefaultlabel(raidPtr, rs, lp)
1793 RF_Raid_t *raidPtr;
1794 struct raid_softc *rs;
1795 struct disklabel *lp;
1796 {
1797 db1_printf(("Building a default label...\n"));
1798 bzero(lp, sizeof(*lp));
1799
1800 /* fabricate a label... */
1801 lp->d_secperunit = raidPtr->totalSectors;
1802 lp->d_secsize = raidPtr->bytesPerSector;
1803 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1804 lp->d_ntracks = 1;
1805 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1806 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1807
1808 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1809 lp->d_type = DTYPE_RAID;
1810 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1811 lp->d_rpm = 3600;
1812 lp->d_interleave = 1;
1813 lp->d_flags = 0;
1814
1815 lp->d_partitions[RAW_PART].p_offset = 0;
1816 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1817 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1818 lp->d_npartitions = RAW_PART + 1;
1819
1820 lp->d_magic = DISKMAGIC;
1821 lp->d_magic2 = DISKMAGIC;
1822 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1823
1824 }
1825 /*
1826 * Read the disklabel from the raid device. If one is not present, fake one
1827 * up.
1828 */
1829 static void
1830 raidgetdisklabel(dev)
1831 dev_t dev;
1832 {
1833 int unit = raidunit(dev);
1834 struct raid_softc *rs = &raid_softc[unit];
1835 char *errstring;
1836 struct disklabel *lp = rs->sc_dkdev.dk_label;
1837 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1838 RF_Raid_t *raidPtr;
1839
1840 db1_printf(("Getting the disklabel...\n"));
1841
1842 bzero(clp, sizeof(*clp));
1843
1844 raidPtr = raidPtrs[unit];
1845
1846 raidgetdefaultlabel(raidPtr, rs, lp);
1847
1848 /*
1849 * Call the generic disklabel extraction routine.
1850 */
1851 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1852 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1853 if (errstring)
1854 raidmakedisklabel(rs);
1855 else {
1856 int i;
1857 struct partition *pp;
1858
1859 /*
1860 * Sanity check whether the found disklabel is valid.
1861 *
1862 * This is necessary since total size of the raid device
1863 * may vary when an interleave is changed even though exactly
1864 * same componets are used, and old disklabel may used
1865 * if that is found.
1866 */
1867 if (lp->d_secperunit != rs->sc_size)
1868 printf("WARNING: %s: "
1869 "total sector size in disklabel (%d) != "
1870 "the size of raid (%ld)\n", rs->sc_xname,
1871 lp->d_secperunit, (long) rs->sc_size);
1872 for (i = 0; i < lp->d_npartitions; i++) {
1873 pp = &lp->d_partitions[i];
1874 if (pp->p_offset + pp->p_size > rs->sc_size)
1875 printf("WARNING: %s: end of partition `%c' "
1876 "exceeds the size of raid (%ld)\n",
1877 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1878 }
1879 }
1880
1881 }
1882 /*
1883 * Take care of things one might want to take care of in the event
1884 * that a disklabel isn't present.
1885 */
1886 static void
1887 raidmakedisklabel(rs)
1888 struct raid_softc *rs;
1889 {
1890 struct disklabel *lp = rs->sc_dkdev.dk_label;
1891 db1_printf(("Making a label..\n"));
1892
1893 /*
1894 * For historical reasons, if there's no disklabel present
1895 * the raw partition must be marked FS_BSDFFS.
1896 */
1897
1898 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1899
1900 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1901
1902 lp->d_checksum = dkcksum(lp);
1903 }
1904 /*
1905 * Lookup the provided name in the filesystem. If the file exists,
1906 * is a valid block device, and isn't being used by anyone else,
1907 * set *vpp to the file's vnode.
1908 * You'll find the original of this in ccd.c
1909 */
1910 int
1911 raidlookup(path, p, vpp)
1912 char *path;
1913 struct proc *p;
1914 struct vnode **vpp; /* result */
1915 {
1916 struct nameidata nd;
1917 struct vnode *vp;
1918 struct vattr va;
1919 int error;
1920
1921 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1922 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1923 #ifdef DEBUG
1924 printf("RAIDframe: vn_open returned %d\n", error);
1925 #endif
1926 return (error);
1927 }
1928 vp = nd.ni_vp;
1929 if (vp->v_usecount > 1) {
1930 VOP_UNLOCK(vp, 0);
1931 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1932 return (EBUSY);
1933 }
1934 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1935 VOP_UNLOCK(vp, 0);
1936 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1937 return (error);
1938 }
1939 /* XXX: eventually we should handle VREG, too. */
1940 if (va.va_type != VBLK) {
1941 VOP_UNLOCK(vp, 0);
1942 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1943 return (ENOTBLK);
1944 }
1945 VOP_UNLOCK(vp, 0);
1946 *vpp = vp;
1947 return (0);
1948 }
1949 /*
1950 * Wait interruptibly for an exclusive lock.
1951 *
1952 * XXX
1953 * Several drivers do this; it should be abstracted and made MP-safe.
1954 * (Hmm... where have we seen this warning before :-> GO )
1955 */
1956 static int
1957 raidlock(rs)
1958 struct raid_softc *rs;
1959 {
1960 int error;
1961
1962 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1963 rs->sc_flags |= RAIDF_WANTED;
1964 if ((error =
1965 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1966 return (error);
1967 }
1968 rs->sc_flags |= RAIDF_LOCKED;
1969 return (0);
1970 }
1971 /*
1972 * Unlock and wake up any waiters.
1973 */
1974 static void
1975 raidunlock(rs)
1976 struct raid_softc *rs;
1977 {
1978
1979 rs->sc_flags &= ~RAIDF_LOCKED;
1980 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1981 rs->sc_flags &= ~RAIDF_WANTED;
1982 wakeup(rs);
1983 }
1984 }
1985
1986
1987 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1988 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1989
1990 int
1991 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1992 {
1993 RF_ComponentLabel_t component_label;
1994 raidread_component_label(dev, b_vp, &component_label);
1995 component_label.mod_counter = mod_counter;
1996 component_label.clean = RF_RAID_CLEAN;
1997 raidwrite_component_label(dev, b_vp, &component_label);
1998 return(0);
1999 }
2000
2001
2002 int
2003 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2004 {
2005 RF_ComponentLabel_t component_label;
2006 raidread_component_label(dev, b_vp, &component_label);
2007 component_label.mod_counter = mod_counter;
2008 component_label.clean = RF_RAID_DIRTY;
2009 raidwrite_component_label(dev, b_vp, &component_label);
2010 return(0);
2011 }
2012
2013 /* ARGSUSED */
2014 int
2015 raidread_component_label(dev, b_vp, component_label)
2016 dev_t dev;
2017 struct vnode *b_vp;
2018 RF_ComponentLabel_t *component_label;
2019 {
2020 struct buf *bp;
2021 int error;
2022
2023 /* XXX should probably ensure that we don't try to do this if
2024 someone has changed rf_protected_sectors. */
2025
2026 /* get a block of the appropriate size... */
2027 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2028 bp->b_dev = dev;
2029
2030 /* get our ducks in a row for the read */
2031 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2032 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2033 bp->b_flags = B_BUSY | B_READ;
2034 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2035
2036 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2037
2038 error = biowait(bp);
2039
2040 if (!error) {
2041 memcpy(component_label, bp->b_un.b_addr,
2042 sizeof(RF_ComponentLabel_t));
2043 #if 0
2044 printf("raidread_component_label: got component label:\n");
2045 printf("Version: %d\n",component_label->version);
2046 printf("Serial Number: %d\n",component_label->serial_number);
2047 printf("Mod counter: %d\n",component_label->mod_counter);
2048 printf("Row: %d\n", component_label->row);
2049 printf("Column: %d\n", component_label->column);
2050 printf("Num Rows: %d\n", component_label->num_rows);
2051 printf("Num Columns: %d\n", component_label->num_columns);
2052 printf("Clean: %d\n", component_label->clean);
2053 printf("Status: %d\n", component_label->status);
2054 #endif
2055 } else {
2056 printf("Failed to read RAID component label!\n");
2057 }
2058
2059 bp->b_flags = B_INVAL | B_AGE;
2060 brelse(bp);
2061 return(error);
2062 }
2063 /* ARGSUSED */
2064 int
2065 raidwrite_component_label(dev, b_vp, component_label)
2066 dev_t dev;
2067 struct vnode *b_vp;
2068 RF_ComponentLabel_t *component_label;
2069 {
2070 struct buf *bp;
2071 int error;
2072
2073 /* get a block of the appropriate size... */
2074 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2075 bp->b_dev = dev;
2076
2077 /* get our ducks in a row for the write */
2078 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2079 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2080 bp->b_flags = B_BUSY | B_WRITE;
2081 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2082
2083 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2084
2085 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2086
2087 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2088 error = biowait(bp);
2089 bp->b_flags = B_INVAL | B_AGE;
2090 brelse(bp);
2091 if (error) {
2092 printf("Failed to write RAID component info!\n");
2093 }
2094
2095 return(error);
2096 }
2097
2098 void
2099 rf_markalldirty( raidPtr )
2100 RF_Raid_t *raidPtr;
2101 {
2102 RF_ComponentLabel_t c_label;
2103 int r,c;
2104
2105 raidPtr->mod_counter++;
2106 for (r = 0; r < raidPtr->numRow; r++) {
2107 for (c = 0; c < raidPtr->numCol; c++) {
2108 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2109 raidread_component_label(
2110 raidPtr->Disks[r][c].dev,
2111 raidPtr->raid_cinfo[r][c].ci_vp,
2112 &c_label);
2113 if (c_label.status == rf_ds_spared) {
2114 /* XXX do something special...
2115 but whatever you do, don't
2116 try to access it!! */
2117 } else {
2118 #if 0
2119 c_label.status =
2120 raidPtr->Disks[r][c].status;
2121 raidwrite_component_label(
2122 raidPtr->Disks[r][c].dev,
2123 raidPtr->raid_cinfo[r][c].ci_vp,
2124 &c_label);
2125 #endif
2126 raidmarkdirty(
2127 raidPtr->Disks[r][c].dev,
2128 raidPtr->raid_cinfo[r][c].ci_vp,
2129 raidPtr->mod_counter);
2130 }
2131 }
2132 }
2133 }
2134 /* printf("Component labels marked dirty.\n"); */
2135 #if 0
2136 for( c = 0; c < raidPtr->numSpare ; c++) {
2137 sparecol = raidPtr->numCol + c;
2138 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2139 /*
2140
2141 XXX this is where we get fancy and map this spare
2142 into it's correct spot in the array.
2143
2144 */
2145 /*
2146
2147 we claim this disk is "optimal" if it's
2148 rf_ds_used_spare, as that means it should be
2149 directly substitutable for the disk it replaced.
2150 We note that too...
2151
2152 */
2153
2154 for(i=0;i<raidPtr->numRow;i++) {
2155 for(j=0;j<raidPtr->numCol;j++) {
2156 if ((raidPtr->Disks[i][j].spareRow ==
2157 r) &&
2158 (raidPtr->Disks[i][j].spareCol ==
2159 sparecol)) {
2160 srow = r;
2161 scol = sparecol;
2162 break;
2163 }
2164 }
2165 }
2166
2167 raidread_component_label(
2168 raidPtr->Disks[r][sparecol].dev,
2169 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2170 &c_label);
2171 /* make sure status is noted */
2172 c_label.version = RF_COMPONENT_LABEL_VERSION;
2173 c_label.mod_counter = raidPtr->mod_counter;
2174 c_label.serial_number = raidPtr->serial_number;
2175 c_label.row = srow;
2176 c_label.column = scol;
2177 c_label.num_rows = raidPtr->numRow;
2178 c_label.num_columns = raidPtr->numCol;
2179 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2180 c_label.status = rf_ds_optimal;
2181 raidwrite_component_label(
2182 raidPtr->Disks[r][sparecol].dev,
2183 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2184 &c_label);
2185 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2186 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2187 }
2188 }
2189
2190 #endif
2191 }
2192
2193
2194 void
2195 rf_update_component_labels( raidPtr )
2196 RF_Raid_t *raidPtr;
2197 {
2198 RF_ComponentLabel_t c_label;
2199 int sparecol;
2200 int r,c;
2201 int i,j;
2202 int srow, scol;
2203
2204 srow = -1;
2205 scol = -1;
2206
2207 /* XXX should do extra checks to make sure things really are clean,
2208 rather than blindly setting the clean bit... */
2209
2210 raidPtr->mod_counter++;
2211
2212 for (r = 0; r < raidPtr->numRow; r++) {
2213 for (c = 0; c < raidPtr->numCol; c++) {
2214 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2215 raidread_component_label(
2216 raidPtr->Disks[r][c].dev,
2217 raidPtr->raid_cinfo[r][c].ci_vp,
2218 &c_label);
2219 /* make sure status is noted */
2220 c_label.status = rf_ds_optimal;
2221 raidwrite_component_label(
2222 raidPtr->Disks[r][c].dev,
2223 raidPtr->raid_cinfo[r][c].ci_vp,
2224 &c_label);
2225 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2226 raidmarkclean(
2227 raidPtr->Disks[r][c].dev,
2228 raidPtr->raid_cinfo[r][c].ci_vp,
2229 raidPtr->mod_counter);
2230 }
2231 }
2232 /* else we don't touch it.. */
2233 #if 0
2234 else if (raidPtr->Disks[r][c].status !=
2235 rf_ds_failed) {
2236 raidread_component_label(
2237 raidPtr->Disks[r][c].dev,
2238 raidPtr->raid_cinfo[r][c].ci_vp,
2239 &c_label);
2240 /* make sure status is noted */
2241 c_label.status =
2242 raidPtr->Disks[r][c].status;
2243 raidwrite_component_label(
2244 raidPtr->Disks[r][c].dev,
2245 raidPtr->raid_cinfo[r][c].ci_vp,
2246 &c_label);
2247 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2248 raidmarkclean(
2249 raidPtr->Disks[r][c].dev,
2250 raidPtr->raid_cinfo[r][c].ci_vp,
2251 raidPtr->mod_counter);
2252 }
2253 }
2254 #endif
2255 }
2256 }
2257
2258 for( c = 0; c < raidPtr->numSpare ; c++) {
2259 sparecol = raidPtr->numCol + c;
2260 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2261 /*
2262
2263 we claim this disk is "optimal" if it's
2264 rf_ds_used_spare, as that means it should be
2265 directly substitutable for the disk it replaced.
2266 We note that too...
2267
2268 */
2269
2270 for(i=0;i<raidPtr->numRow;i++) {
2271 for(j=0;j<raidPtr->numCol;j++) {
2272 if ((raidPtr->Disks[i][j].spareRow ==
2273 0) &&
2274 (raidPtr->Disks[i][j].spareCol ==
2275 sparecol)) {
2276 srow = i;
2277 scol = j;
2278 break;
2279 }
2280 }
2281 }
2282
2283 raidread_component_label(
2284 raidPtr->Disks[0][sparecol].dev,
2285 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2286 &c_label);
2287 /* make sure status is noted */
2288 c_label.version = RF_COMPONENT_LABEL_VERSION;
2289 c_label.mod_counter = raidPtr->mod_counter;
2290 c_label.serial_number = raidPtr->serial_number;
2291 c_label.row = srow;
2292 c_label.column = scol;
2293 c_label.num_rows = raidPtr->numRow;
2294 c_label.num_columns = raidPtr->numCol;
2295 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2296 c_label.status = rf_ds_optimal;
2297 raidwrite_component_label(
2298 raidPtr->Disks[0][sparecol].dev,
2299 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2300 &c_label);
2301 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2302 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2303 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2304 raidPtr->mod_counter);
2305 }
2306 }
2307 }
2308 /* printf("Component labels updated\n"); */
2309 }
2310