rf_netbsdkintf.c revision 1.23 1 /* $NetBSD: rf_netbsdkintf.c,v 1.23 1999/08/10 21:41:37 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 case RAIDFRAME_CHECK_PARITY:
736 if ((rs->sc_flags & RAIDF_INITED) == 0)
737 return (ENXIO);
738 }
739
740 switch (cmd) {
741
742
743 /* configure the system */
744 case RAIDFRAME_CONFIGURE:
745
746 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
747 /* copy-in the configuration information */
748 /* data points to a pointer to the configuration structure */
749 u_cfg = *((RF_Config_t **) data);
750 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
751 if (k_cfg == NULL) {
752 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
753 return (ENOMEM);
754 }
755 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
756 sizeof(RF_Config_t));
757 if (retcode) {
758 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
759 retcode));
760 return (retcode);
761 }
762 /* allocate a buffer for the layout-specific data, and copy it
763 * in */
764 if (k_cfg->layoutSpecificSize) {
765 if (k_cfg->layoutSpecificSize > 10000) {
766 /* sanity check */
767 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
768 return (EINVAL);
769 }
770 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
771 (u_char *));
772 if (specific_buf == NULL) {
773 RF_Free(k_cfg, sizeof(RF_Config_t));
774 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
775 return (ENOMEM);
776 }
777 retcode = copyin(k_cfg->layoutSpecific,
778 (caddr_t) specific_buf,
779 k_cfg->layoutSpecificSize);
780 if (retcode) {
781 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
782 retcode));
783 return (retcode);
784 }
785 } else
786 specific_buf = NULL;
787 k_cfg->layoutSpecific = specific_buf;
788
789 /* should do some kind of sanity check on the configuration.
790 * Store the sum of all the bytes in the last byte? */
791
792 #if 0
793 db1_printf(("Considering configuring the system.:%d 0x%x\n",
794 unit, p));
795 #endif
796
797 /* We need the pointer to this a little deeper, so stash it
798 * here... */
799
800 raidPtrs[unit]->proc = p;
801
802 /* configure the system */
803
804 raidPtrs[unit]->raidid = unit;
805
806 retcode = rf_Configure(raidPtrs[unit], k_cfg);
807
808 /* allow this many simultaneous IO's to this RAID device */
809 raidPtrs[unit]->openings = RAIDOUTSTANDING;
810
811 if (retcode == 0) {
812 retcode = raidinit(dev, raidPtrs[unit], unit);
813 rf_markalldirty( raidPtrs[unit] );
814 }
815 /* free the buffers. No return code here. */
816 if (k_cfg->layoutSpecificSize) {
817 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
818 }
819 RF_Free(k_cfg, sizeof(RF_Config_t));
820
821 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
822 retcode));
823
824 return (retcode);
825
826 /* shutdown the system */
827 case RAIDFRAME_SHUTDOWN:
828
829 if ((error = raidlock(rs)) != 0)
830 return (error);
831
832 /*
833 * If somebody has a partition mounted, we shouldn't
834 * shutdown.
835 */
836
837 part = DISKPART(dev);
838 pmask = (1 << part);
839 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
840 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
841 (rs->sc_dkdev.dk_copenmask & pmask))) {
842 raidunlock(rs);
843 return (EBUSY);
844 }
845
846 if (rf_debugKernelAccess) {
847 printf("call shutdown\n");
848 }
849 raidPtrs[unit]->proc = p; /* XXX necessary evil */
850
851 retcode = rf_Shutdown(raidPtrs[unit]);
852
853 db1_printf(("Done main shutdown\n"));
854
855 pool_destroy(&rs->sc_cbufpool);
856 db1_printf(("Done freeing component buffer freelist\n"));
857
858 /* It's no longer initialized... */
859 rs->sc_flags &= ~RAIDF_INITED;
860
861 /* Detach the disk. */
862 disk_detach(&rs->sc_dkdev);
863
864 raidunlock(rs);
865
866 return (retcode);
867 case RAIDFRAME_GET_COMPONENT_LABEL:
868 c_label_ptr = (RF_ComponentLabel_t **) data;
869 /* need to read the component label for the disk indicated
870 by row,column in component_label
871 XXX need to sanity check these values!!!
872 */
873
874 /* For practice, let's get it directly fromdisk, rather
875 than from the in-core copy */
876 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
877 (RF_ComponentLabel_t *));
878 if (component_label == NULL)
879 return (ENOMEM);
880
881 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
882
883 retcode = copyin( *c_label_ptr, component_label,
884 sizeof(RF_ComponentLabel_t));
885
886 if (retcode) {
887 return(retcode);
888 }
889
890 row = component_label->row;
891 printf("Row: %d\n",row);
892 if (row > raidPtrs[unit]->numRow) {
893 row = 0; /* XXX */
894 }
895 column = component_label->column;
896 printf("Column: %d\n",column);
897 if (column > raidPtrs[unit]->numCol) {
898 column = 0; /* XXX */
899 }
900
901 raidread_component_label(
902 raidPtrs[unit]->Disks[row][column].dev,
903 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
904 component_label );
905
906 retcode = copyout((caddr_t) component_label,
907 (caddr_t) *c_label_ptr,
908 sizeof(RF_ComponentLabel_t));
909 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
910 return (retcode);
911
912 case RAIDFRAME_SET_COMPONENT_LABEL:
913 component_label = (RF_ComponentLabel_t *) data;
914
915 /* XXX check the label for valid stuff... */
916 /* Note that some things *should not* get modified --
917 the user should be re-initing the labels instead of
918 trying to patch things.
919 */
920
921 printf("Got component label:\n");
922 printf("Version: %d\n",component_label->version);
923 printf("Serial Number: %d\n",component_label->serial_number);
924 printf("Mod counter: %d\n",component_label->mod_counter);
925 printf("Row: %d\n", component_label->row);
926 printf("Column: %d\n", component_label->column);
927 printf("Num Rows: %d\n", component_label->num_rows);
928 printf("Num Columns: %d\n", component_label->num_columns);
929 printf("Clean: %d\n", component_label->clean);
930 printf("Status: %d\n", component_label->status);
931
932 row = component_label->row;
933 column = component_label->column;
934
935 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
936 (column < 0) || (column > raidPtrs[unit]->numCol)) {
937 return(EINVAL);
938 }
939
940 /* XXX this isn't allowed to do anything for now :-) */
941 #if 0
942 raidwrite_component_label(
943 raidPtrs[unit]->Disks[row][column].dev,
944 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
945 component_label );
946 #endif
947 return (0);
948
949 case RAIDFRAME_INIT_LABELS:
950 component_label = (RF_ComponentLabel_t *) data;
951 /*
952 we only want the serial number from
953 the above. We get all the rest of the information
954 from the config that was used to create this RAID
955 set.
956 */
957
958 raidPtrs[unit]->serial_number = component_label->serial_number;
959 /* current version number */
960 ci_label.version = RF_COMPONENT_LABEL_VERSION;
961 ci_label.serial_number = component_label->serial_number;
962 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
963 ci_label.num_rows = raidPtrs[unit]->numRow;
964 ci_label.num_columns = raidPtrs[unit]->numCol;
965 ci_label.clean = RF_RAID_DIRTY; /* not clean */
966 ci_label.status = rf_ds_optimal; /* "It's good!" */
967
968 for(row=0;row<raidPtrs[unit]->numRow;row++) {
969 ci_label.row = row;
970 for(column=0;column<raidPtrs[unit]->numCol;column++) {
971 ci_label.column = column;
972 raidwrite_component_label(
973 raidPtrs[unit]->Disks[row][column].dev,
974 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
975 &ci_label );
976 }
977 }
978
979 return (retcode);
980
981 /* initialize all parity */
982 case RAIDFRAME_REWRITEPARITY:
983
984 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
985 /* Parity for RAID 0 is trivially correct */
986 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
987 return(0);
988 }
989
990 /* borrow the thread of the requesting process */
991 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
992 s = splbio();
993 retcode = rf_RewriteParity(raidPtrs[unit]);
994 splx(s);
995 /* return I/O Error if the parity rewrite fails */
996
997 if (retcode) {
998 retcode = EIO;
999 } else {
1000 /* set the clean bit! If we shutdown correctly,
1001 the clean bit on each component label will get
1002 set */
1003 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1004 }
1005 return (retcode);
1006
1007
1008 case RAIDFRAME_ADD_HOT_SPARE:
1009 sparePtr = (RF_SingleComponent_t *) data;
1010 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1011 printf("Adding spare\n");
1012 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1013 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1014 return(retcode);
1015
1016 case RAIDFRAME_REMOVE_HOT_SPARE:
1017 return(retcode);
1018
1019 case RAIDFRAME_REBUILD_IN_PLACE:
1020 componentPtr = (RF_SingleComponent_t *) data;
1021 memcpy( &component, componentPtr,
1022 sizeof(RF_SingleComponent_t));
1023 row = component.row;
1024 column = component.column;
1025 printf("Rebuild: %d %d\n",row, column);
1026 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1027 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1028 return(EINVAL);
1029 }
1030 printf("Attempting a rebuild in place\n");
1031 s = splbio();
1032 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1033 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1034 splx(s);
1035 return(retcode);
1036
1037 /* issue a test-unit-ready through raidframe to the indicated
1038 * device */
1039 #if 0 /* XXX not supported yet (ever?) */
1040 case RAIDFRAME_TUR:
1041 /* debug only */
1042 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1043 return (retcode);
1044 #endif
1045 case RAIDFRAME_GET_INFO:
1046 {
1047 RF_Raid_t *raid = raidPtrs[unit];
1048 RF_DeviceConfig_t *cfg, **ucfgp;
1049 int i, j, d;
1050
1051 if (!raid->valid)
1052 return (ENODEV);
1053 ucfgp = (RF_DeviceConfig_t **) data;
1054 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1055 (RF_DeviceConfig_t *));
1056 if (cfg == NULL)
1057 return (ENOMEM);
1058 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1059 cfg->rows = raid->numRow;
1060 cfg->cols = raid->numCol;
1061 cfg->ndevs = raid->numRow * raid->numCol;
1062 if (cfg->ndevs >= RF_MAX_DISKS) {
1063 cfg->ndevs = 0;
1064 return (ENOMEM);
1065 }
1066 cfg->nspares = raid->numSpare;
1067 if (cfg->nspares >= RF_MAX_DISKS) {
1068 cfg->nspares = 0;
1069 return (ENOMEM);
1070 }
1071 cfg->maxqdepth = raid->maxQueueDepth;
1072 d = 0;
1073 for (i = 0; i < cfg->rows; i++) {
1074 for (j = 0; j < cfg->cols; j++) {
1075 cfg->devs[d] = raid->Disks[i][j];
1076 d++;
1077 }
1078 }
1079 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1080 cfg->spares[i] = raid->Disks[0][j];
1081 }
1082 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1083 sizeof(RF_DeviceConfig_t));
1084 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1085
1086 return (retcode);
1087 }
1088 break;
1089 case RAIDFRAME_CHECK_PARITY:
1090 *(int *) data = raidPtrs[unit]->parity_good;
1091 return (0);
1092 case RAIDFRAME_RESET_ACCTOTALS:
1093 {
1094 RF_Raid_t *raid = raidPtrs[unit];
1095
1096 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1097 return (0);
1098 }
1099 break;
1100
1101 case RAIDFRAME_GET_ACCTOTALS:
1102 {
1103 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1104 RF_Raid_t *raid = raidPtrs[unit];
1105
1106 *totals = raid->acc_totals;
1107 return (0);
1108 }
1109 break;
1110
1111 case RAIDFRAME_KEEP_ACCTOTALS:
1112 {
1113 RF_Raid_t *raid = raidPtrs[unit];
1114 int *keep = (int *) data;
1115
1116 raid->keep_acc_totals = *keep;
1117 return (0);
1118 }
1119 break;
1120
1121 case RAIDFRAME_GET_SIZE:
1122 *(int *) data = raidPtrs[unit]->totalSectors;
1123 return (0);
1124
1125 #define RAIDFRAME_RECON 1
1126 /* XXX The above should probably be set somewhere else!! GO */
1127 #if RAIDFRAME_RECON > 0
1128
1129 /* fail a disk & optionally start reconstruction */
1130 case RAIDFRAME_FAIL_DISK:
1131 rr = (struct rf_recon_req *) data;
1132
1133 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1134 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1135 return (EINVAL);
1136
1137 printf("raid%d: Failing the disk: row: %d col: %d\n",
1138 unit, rr->row, rr->col);
1139
1140 /* make a copy of the recon request so that we don't rely on
1141 * the user's buffer */
1142 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1143 bcopy(rr, rrcopy, sizeof(*rr));
1144 rrcopy->raidPtr = (void *) raidPtrs[unit];
1145
1146 LOCK_RECON_Q_MUTEX();
1147 rrcopy->next = recon_queue;
1148 recon_queue = rrcopy;
1149 wakeup(&recon_queue);
1150 UNLOCK_RECON_Q_MUTEX();
1151
1152 return (0);
1153
1154 /* invoke a copyback operation after recon on whatever disk
1155 * needs it, if any */
1156 case RAIDFRAME_COPYBACK:
1157 /* borrow the current thread to get this done */
1158 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1159 s = splbio();
1160 rf_CopybackReconstructedData(raidPtrs[unit]);
1161 splx(s);
1162 return (0);
1163
1164 /* return the percentage completion of reconstruction */
1165 case RAIDFRAME_CHECKRECON:
1166 row = *(int *) data;
1167 if (row < 0 || row >= raidPtrs[unit]->numRow)
1168 return (EINVAL);
1169 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1170 *(int *) data = 100;
1171 else
1172 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1173 return (0);
1174
1175 /* the sparetable daemon calls this to wait for the kernel to
1176 * need a spare table. this ioctl does not return until a
1177 * spare table is needed. XXX -- calling mpsleep here in the
1178 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1179 * -- I should either compute the spare table in the kernel,
1180 * or have a different -- XXX XXX -- interface (a different
1181 * character device) for delivering the table -- XXX */
1182 #if 0
1183 case RAIDFRAME_SPARET_WAIT:
1184 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1185 while (!rf_sparet_wait_queue)
1186 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1187 waitreq = rf_sparet_wait_queue;
1188 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1189 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1190
1191 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1192
1193 RF_Free(waitreq, sizeof(*waitreq));
1194 return (0);
1195
1196
1197 /* wakes up a process waiting on SPARET_WAIT and puts an error
1198 * code in it that will cause the dameon to exit */
1199 case RAIDFRAME_ABORT_SPARET_WAIT:
1200 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1201 waitreq->fcol = -1;
1202 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1203 waitreq->next = rf_sparet_wait_queue;
1204 rf_sparet_wait_queue = waitreq;
1205 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1206 wakeup(&rf_sparet_wait_queue);
1207 return (0);
1208
1209 /* used by the spare table daemon to deliver a spare table
1210 * into the kernel */
1211 case RAIDFRAME_SEND_SPARET:
1212
1213 /* install the spare table */
1214 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1215
1216 /* respond to the requestor. the return status of the spare
1217 * table installation is passed in the "fcol" field */
1218 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1219 waitreq->fcol = retcode;
1220 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1221 waitreq->next = rf_sparet_resp_queue;
1222 rf_sparet_resp_queue = waitreq;
1223 wakeup(&rf_sparet_resp_queue);
1224 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1225
1226 return (retcode);
1227 #endif
1228
1229
1230 #endif /* RAIDFRAME_RECON > 0 */
1231
1232 default:
1233 break; /* fall through to the os-specific code below */
1234
1235 }
1236
1237 if (!raidPtrs[unit]->valid)
1238 return (EINVAL);
1239
1240 /*
1241 * Add support for "regular" device ioctls here.
1242 */
1243
1244 switch (cmd) {
1245 case DIOCGDINFO:
1246 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1247 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1248 break;
1249
1250 case DIOCGPART:
1251 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1252 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1253 ((struct partinfo *) data)->part =
1254 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1255 break;
1256
1257 case DIOCWDINFO:
1258 db1_printf(("DIOCWDINFO\n"));
1259 case DIOCSDINFO:
1260 db1_printf(("DIOCSDINFO\n"));
1261 if ((error = raidlock(rs)) != 0)
1262 return (error);
1263
1264 rs->sc_flags |= RAIDF_LABELLING;
1265
1266 error = setdisklabel(rs->sc_dkdev.dk_label,
1267 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1268 if (error == 0) {
1269 if (cmd == DIOCWDINFO)
1270 error = writedisklabel(RAIDLABELDEV(dev),
1271 raidstrategy, rs->sc_dkdev.dk_label,
1272 rs->sc_dkdev.dk_cpulabel);
1273 }
1274 rs->sc_flags &= ~RAIDF_LABELLING;
1275
1276 raidunlock(rs);
1277
1278 if (error)
1279 return (error);
1280 break;
1281
1282 case DIOCWLABEL:
1283 db1_printf(("DIOCWLABEL\n"));
1284 if (*(int *) data != 0)
1285 rs->sc_flags |= RAIDF_WLABEL;
1286 else
1287 rs->sc_flags &= ~RAIDF_WLABEL;
1288 break;
1289
1290 case DIOCGDEFLABEL:
1291 db1_printf(("DIOCGDEFLABEL\n"));
1292 raidgetdefaultlabel(raidPtrs[unit], rs,
1293 (struct disklabel *) data);
1294 break;
1295
1296 default:
1297 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1298 }
1299 return (retcode);
1300
1301 }
1302
1303
1304 /* raidinit -- complete the rest of the initialization for the
1305 RAIDframe device. */
1306
1307
1308 static int
1309 raidinit(dev, raidPtr, unit)
1310 dev_t dev;
1311 RF_Raid_t *raidPtr;
1312 int unit;
1313 {
1314 int retcode;
1315 /* int ix; */
1316 /* struct raidbuf *raidbp; */
1317 struct raid_softc *rs;
1318
1319 retcode = 0;
1320
1321 rs = &raid_softc[unit];
1322 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1323 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1324
1325
1326 /* XXX should check return code first... */
1327 rs->sc_flags |= RAIDF_INITED;
1328
1329 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1330
1331 rs->sc_dkdev.dk_name = rs->sc_xname;
1332
1333 /* disk_attach actually creates space for the CPU disklabel, among
1334 * other things, so it's critical to call this *BEFORE* we try putzing
1335 * with disklabels. */
1336
1337 disk_attach(&rs->sc_dkdev);
1338
1339 /* XXX There may be a weird interaction here between this, and
1340 * protectedSectors, as used in RAIDframe. */
1341
1342 rs->sc_size = raidPtr->totalSectors;
1343 rs->sc_dev = dev;
1344
1345 return (retcode);
1346 }
1347
1348 /*
1349 * This kernel thread never exits. It is created once, and persists
1350 * until the system reboots.
1351 */
1352
1353 void
1354 rf_ReconKernelThread()
1355 {
1356 struct rf_recon_req *req;
1357 int s;
1358
1359 /* XXX not sure what spl() level we should be at here... probably
1360 * splbio() */
1361 s = splbio();
1362
1363 while (1) {
1364 /* grab the next reconstruction request from the queue */
1365 LOCK_RECON_Q_MUTEX();
1366 while (!recon_queue) {
1367 UNLOCK_RECON_Q_MUTEX();
1368 tsleep(&recon_queue, PRIBIO,
1369 "raidframe recon", 0);
1370 LOCK_RECON_Q_MUTEX();
1371 }
1372 req = recon_queue;
1373 recon_queue = recon_queue->next;
1374 UNLOCK_RECON_Q_MUTEX();
1375
1376 /*
1377 * If flags specifies that we should start recon, this call
1378 * will not return until reconstruction completes, fails,
1379 * or is aborted.
1380 */
1381 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1382 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1383
1384 RF_Free(req, sizeof(*req));
1385 }
1386 }
1387 /* wake up the daemon & tell it to get us a spare table
1388 * XXX
1389 * the entries in the queues should be tagged with the raidPtr
1390 * so that in the extremely rare case that two recons happen at once,
1391 * we know for which device were requesting a spare table
1392 * XXX
1393 */
1394 int
1395 rf_GetSpareTableFromDaemon(req)
1396 RF_SparetWait_t *req;
1397 {
1398 int retcode;
1399
1400 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1401 req->next = rf_sparet_wait_queue;
1402 rf_sparet_wait_queue = req;
1403 wakeup(&rf_sparet_wait_queue);
1404
1405 /* mpsleep unlocks the mutex */
1406 while (!rf_sparet_resp_queue) {
1407 tsleep(&rf_sparet_resp_queue, PRIBIO,
1408 "raidframe getsparetable", 0);
1409 #if 0
1410 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1411 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1412 MS_LOCK_SIMPLE);
1413 #endif
1414 }
1415 req = rf_sparet_resp_queue;
1416 rf_sparet_resp_queue = req->next;
1417 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1418
1419 retcode = req->fcol;
1420 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1421 * alloc'd */
1422 return (retcode);
1423 }
1424 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1425 * bp & passes it down.
1426 * any calls originating in the kernel must use non-blocking I/O
1427 * do some extra sanity checking to return "appropriate" error values for
1428 * certain conditions (to make some standard utilities work)
1429 */
1430 int
1431 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1432 RF_Raid_t *raidPtr;
1433 struct buf *bp;
1434 RF_RaidAccessFlags_t flags;
1435 void (*cbFunc) (struct buf *);
1436 void *cbArg;
1437 {
1438 RF_SectorCount_t num_blocks, pb, sum;
1439 RF_RaidAddr_t raid_addr;
1440 int retcode;
1441 struct partition *pp;
1442 daddr_t blocknum;
1443 int unit;
1444 struct raid_softc *rs;
1445 int do_async;
1446
1447 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1448
1449 unit = raidPtr->raidid;
1450 rs = &raid_softc[unit];
1451
1452 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1453 * partition.. Need to make it absolute to the underlying device.. */
1454
1455 blocknum = bp->b_blkno;
1456 if (DISKPART(bp->b_dev) != RAW_PART) {
1457 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1458 blocknum += pp->p_offset;
1459 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1460 pp->p_offset));
1461 } else {
1462 db1_printf(("Is raw..\n"));
1463 }
1464 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1465
1466 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1467 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1468
1469 /* *THIS* is where we adjust what block we're going to... but DO NOT
1470 * TOUCH bp->b_blkno!!! */
1471 raid_addr = blocknum;
1472
1473 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1474 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1475 sum = raid_addr + num_blocks + pb;
1476 if (1 || rf_debugKernelAccess) {
1477 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1478 (int) raid_addr, (int) sum, (int) num_blocks,
1479 (int) pb, (int) bp->b_resid));
1480 }
1481 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1482 || (sum < num_blocks) || (sum < pb)) {
1483 bp->b_error = ENOSPC;
1484 bp->b_flags |= B_ERROR;
1485 bp->b_resid = bp->b_bcount;
1486 biodone(bp);
1487 return (bp->b_error);
1488 }
1489 /*
1490 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1491 */
1492
1493 if (bp->b_bcount & raidPtr->sectorMask) {
1494 bp->b_error = EINVAL;
1495 bp->b_flags |= B_ERROR;
1496 bp->b_resid = bp->b_bcount;
1497 biodone(bp);
1498 return (bp->b_error);
1499 }
1500 db1_printf(("Calling DoAccess..\n"));
1501
1502
1503 /* Put a throttle on the number of requests we handle simultanously */
1504
1505 RF_LOCK_MUTEX(raidPtr->mutex);
1506
1507 while(raidPtr->openings <= 0) {
1508 RF_UNLOCK_MUTEX(raidPtr->mutex);
1509 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1510 RF_LOCK_MUTEX(raidPtr->mutex);
1511 }
1512 raidPtr->openings--;
1513
1514 RF_UNLOCK_MUTEX(raidPtr->mutex);
1515
1516 /*
1517 * Everything is async.
1518 */
1519 do_async = 1;
1520
1521 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1522 * B_READ instead */
1523 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1524 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1525 do_async, raid_addr, num_blocks,
1526 bp->b_un.b_addr,
1527 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1528 NULL, cbFunc, cbArg);
1529 #if 0
1530 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1531 bp->b_data, (int) bp->b_resid));
1532 #endif
1533
1534 return (retcode);
1535 }
1536 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1537
1538 int
1539 rf_DispatchKernelIO(queue, req)
1540 RF_DiskQueue_t *queue;
1541 RF_DiskQueueData_t *req;
1542 {
1543 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1544 struct buf *bp;
1545 struct raidbuf *raidbp = NULL;
1546 struct raid_softc *rs;
1547 int unit;
1548
1549 /* XXX along with the vnode, we also need the softc associated with
1550 * this device.. */
1551
1552 req->queue = queue;
1553
1554 unit = queue->raidPtr->raidid;
1555
1556 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1557
1558 if (unit >= numraid) {
1559 printf("Invalid unit number: %d %d\n", unit, numraid);
1560 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1561 }
1562 rs = &raid_softc[unit];
1563
1564 /* XXX is this the right place? */
1565 disk_busy(&rs->sc_dkdev);
1566
1567 bp = req->bp;
1568 #if 1
1569 /* XXX when there is a physical disk failure, someone is passing us a
1570 * buffer that contains old stuff!! Attempt to deal with this problem
1571 * without taking a performance hit... (not sure where the real bug
1572 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1573
1574 if (bp->b_flags & B_ERROR) {
1575 bp->b_flags &= ~B_ERROR;
1576 }
1577 if (bp->b_error != 0) {
1578 bp->b_error = 0;
1579 }
1580 #endif
1581 raidbp = RAIDGETBUF(rs);
1582
1583 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1584
1585 /*
1586 * context for raidiodone
1587 */
1588 raidbp->rf_obp = bp;
1589 raidbp->req = req;
1590
1591 switch (req->type) {
1592 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1593 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1594 * queue->row, queue->col); */
1595 /* XXX need to do something extra here.. */
1596 /* I'm leaving this in, as I've never actually seen it used,
1597 * and I'd like folks to report it... GO */
1598 printf(("WAKEUP CALLED\n"));
1599 queue->numOutstanding++;
1600
1601 /* XXX need to glue the original buffer into this?? */
1602
1603 KernelWakeupFunc(&raidbp->rf_buf);
1604 break;
1605
1606 case RF_IO_TYPE_READ:
1607 case RF_IO_TYPE_WRITE:
1608
1609 if (req->tracerec) {
1610 RF_ETIMER_START(req->tracerec->timer);
1611 }
1612 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1613 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1614 req->sectorOffset, req->numSector,
1615 req->buf, KernelWakeupFunc, (void *) req,
1616 queue->raidPtr->logBytesPerSector, req->b_proc);
1617
1618 if (rf_debugKernelAccess) {
1619 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1620 (long) bp->b_blkno));
1621 }
1622 queue->numOutstanding++;
1623 queue->last_deq_sector = req->sectorOffset;
1624 /* acc wouldn't have been let in if there were any pending
1625 * reqs at any other priority */
1626 queue->curPriority = req->priority;
1627 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1628 * req->type, queue->row, queue->col); */
1629
1630 db1_printf(("Going for %c to unit %d row %d col %d\n",
1631 req->type, unit, queue->row, queue->col));
1632 db1_printf(("sector %d count %d (%d bytes) %d\n",
1633 (int) req->sectorOffset, (int) req->numSector,
1634 (int) (req->numSector <<
1635 queue->raidPtr->logBytesPerSector),
1636 (int) queue->raidPtr->logBytesPerSector));
1637 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1638 raidbp->rf_buf.b_vp->v_numoutput++;
1639 }
1640 VOP_STRATEGY(&raidbp->rf_buf);
1641
1642 break;
1643
1644 default:
1645 panic("bad req->type in rf_DispatchKernelIO");
1646 }
1647 db1_printf(("Exiting from DispatchKernelIO\n"));
1648 return (0);
1649 }
1650 /* this is the callback function associated with a I/O invoked from
1651 kernel code.
1652 */
1653 static void
1654 KernelWakeupFunc(vbp)
1655 struct buf *vbp;
1656 {
1657 RF_DiskQueueData_t *req = NULL;
1658 RF_DiskQueue_t *queue;
1659 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1660 struct buf *bp;
1661 struct raid_softc *rs;
1662 int unit;
1663 register int s;
1664
1665 s = splbio(); /* XXX */
1666 db1_printf(("recovering the request queue:\n"));
1667 req = raidbp->req;
1668
1669 bp = raidbp->rf_obp;
1670 #if 0
1671 db1_printf(("bp=0x%x\n", bp));
1672 #endif
1673
1674 queue = (RF_DiskQueue_t *) req->queue;
1675
1676 if (raidbp->rf_buf.b_flags & B_ERROR) {
1677 #if 0
1678 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1679 #endif
1680 bp->b_flags |= B_ERROR;
1681 bp->b_error = raidbp->rf_buf.b_error ?
1682 raidbp->rf_buf.b_error : EIO;
1683 }
1684 #if 0
1685 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1686 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1687 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1688 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1689 #endif
1690
1691 /* XXX methinks this could be wrong... */
1692 #if 1
1693 bp->b_resid = raidbp->rf_buf.b_resid;
1694 #endif
1695
1696 if (req->tracerec) {
1697 RF_ETIMER_STOP(req->tracerec->timer);
1698 RF_ETIMER_EVAL(req->tracerec->timer);
1699 RF_LOCK_MUTEX(rf_tracing_mutex);
1700 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1701 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1702 req->tracerec->num_phys_ios++;
1703 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1704 }
1705 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1706
1707 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1708
1709
1710 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1711 * ballistic, and mark the component as hosed... */
1712 #if 1
1713 if (bp->b_flags & B_ERROR) {
1714 /* Mark the disk as dead */
1715 /* but only mark it once... */
1716 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1717 rf_ds_optimal) {
1718 printf("raid%d: IO Error. Marking %s as failed.\n",
1719 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1720 queue->raidPtr->Disks[queue->row][queue->col].status =
1721 rf_ds_failed;
1722 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1723 queue->raidPtr->numFailures++;
1724 /* XXX here we should bump the version number for each component, and write that data out */
1725 } else { /* Disk is already dead... */
1726 /* printf("Disk already marked as dead!\n"); */
1727 }
1728
1729 }
1730 #endif
1731
1732 rs = &raid_softc[unit];
1733 RAIDPUTBUF(rs, raidbp);
1734
1735
1736 if (bp->b_resid == 0) {
1737 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1738 unit, bp->b_resid, bp->b_bcount));
1739 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1740 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1741 } else {
1742 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1743 }
1744
1745 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1746 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1747 /* printf("Exiting KernelWakeupFunc\n"); */
1748
1749 splx(s); /* XXX */
1750 }
1751
1752
1753
1754 /*
1755 * initialize a buf structure for doing an I/O in the kernel.
1756 */
1757 static void
1758 InitBP(
1759 struct buf * bp,
1760 struct vnode * b_vp,
1761 unsigned rw_flag,
1762 dev_t dev,
1763 RF_SectorNum_t startSect,
1764 RF_SectorCount_t numSect,
1765 caddr_t buf,
1766 void (*cbFunc) (struct buf *),
1767 void *cbArg,
1768 int logBytesPerSector,
1769 struct proc * b_proc)
1770 {
1771 /* bp->b_flags = B_PHYS | rw_flag; */
1772 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1773 bp->b_bcount = numSect << logBytesPerSector;
1774 bp->b_bufsize = bp->b_bcount;
1775 bp->b_error = 0;
1776 bp->b_dev = dev;
1777 db1_printf(("bp->b_dev is %d\n", dev));
1778 bp->b_un.b_addr = buf;
1779 #if 0
1780 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1781 #endif
1782
1783 bp->b_blkno = startSect;
1784 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1785 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1786 if (bp->b_bcount == 0) {
1787 panic("bp->b_bcount is zero in InitBP!!\n");
1788 }
1789 bp->b_proc = b_proc;
1790 bp->b_iodone = cbFunc;
1791 bp->b_vp = b_vp;
1792
1793 }
1794 /* Extras... */
1795
1796 unsigned int
1797 rpcc()
1798 {
1799 /* XXX no clue what this is supposed to do.. my guess is that it's
1800 * supposed to read the CPU cycle counter... */
1801 /* db1_printf("this is supposed to do something useful too!??\n"); */
1802 return (0);
1803 }
1804 #if 0
1805 int
1806 rf_GetSpareTableFromDaemon(req)
1807 RF_SparetWait_t *req;
1808 {
1809 int retcode = 1;
1810 printf("This is supposed to do something useful!!\n"); /* XXX */
1811
1812 return (retcode);
1813
1814 }
1815 #endif
1816
1817 static void
1818 raidgetdefaultlabel(raidPtr, rs, lp)
1819 RF_Raid_t *raidPtr;
1820 struct raid_softc *rs;
1821 struct disklabel *lp;
1822 {
1823 db1_printf(("Building a default label...\n"));
1824 bzero(lp, sizeof(*lp));
1825
1826 /* fabricate a label... */
1827 lp->d_secperunit = raidPtr->totalSectors;
1828 lp->d_secsize = raidPtr->bytesPerSector;
1829 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1830 lp->d_ntracks = 1;
1831 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1832 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1833
1834 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1835 lp->d_type = DTYPE_RAID;
1836 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1837 lp->d_rpm = 3600;
1838 lp->d_interleave = 1;
1839 lp->d_flags = 0;
1840
1841 lp->d_partitions[RAW_PART].p_offset = 0;
1842 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1843 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1844 lp->d_npartitions = RAW_PART + 1;
1845
1846 lp->d_magic = DISKMAGIC;
1847 lp->d_magic2 = DISKMAGIC;
1848 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1849
1850 }
1851 /*
1852 * Read the disklabel from the raid device. If one is not present, fake one
1853 * up.
1854 */
1855 static void
1856 raidgetdisklabel(dev)
1857 dev_t dev;
1858 {
1859 int unit = raidunit(dev);
1860 struct raid_softc *rs = &raid_softc[unit];
1861 char *errstring;
1862 struct disklabel *lp = rs->sc_dkdev.dk_label;
1863 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1864 RF_Raid_t *raidPtr;
1865
1866 db1_printf(("Getting the disklabel...\n"));
1867
1868 bzero(clp, sizeof(*clp));
1869
1870 raidPtr = raidPtrs[unit];
1871
1872 raidgetdefaultlabel(raidPtr, rs, lp);
1873
1874 /*
1875 * Call the generic disklabel extraction routine.
1876 */
1877 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1878 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1879 if (errstring)
1880 raidmakedisklabel(rs);
1881 else {
1882 int i;
1883 struct partition *pp;
1884
1885 /*
1886 * Sanity check whether the found disklabel is valid.
1887 *
1888 * This is necessary since total size of the raid device
1889 * may vary when an interleave is changed even though exactly
1890 * same componets are used, and old disklabel may used
1891 * if that is found.
1892 */
1893 if (lp->d_secperunit != rs->sc_size)
1894 printf("WARNING: %s: "
1895 "total sector size in disklabel (%d) != "
1896 "the size of raid (%ld)\n", rs->sc_xname,
1897 lp->d_secperunit, (long) rs->sc_size);
1898 for (i = 0; i < lp->d_npartitions; i++) {
1899 pp = &lp->d_partitions[i];
1900 if (pp->p_offset + pp->p_size > rs->sc_size)
1901 printf("WARNING: %s: end of partition `%c' "
1902 "exceeds the size of raid (%ld)\n",
1903 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1904 }
1905 }
1906
1907 }
1908 /*
1909 * Take care of things one might want to take care of in the event
1910 * that a disklabel isn't present.
1911 */
1912 static void
1913 raidmakedisklabel(rs)
1914 struct raid_softc *rs;
1915 {
1916 struct disklabel *lp = rs->sc_dkdev.dk_label;
1917 db1_printf(("Making a label..\n"));
1918
1919 /*
1920 * For historical reasons, if there's no disklabel present
1921 * the raw partition must be marked FS_BSDFFS.
1922 */
1923
1924 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1925
1926 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1927
1928 lp->d_checksum = dkcksum(lp);
1929 }
1930 /*
1931 * Lookup the provided name in the filesystem. If the file exists,
1932 * is a valid block device, and isn't being used by anyone else,
1933 * set *vpp to the file's vnode.
1934 * You'll find the original of this in ccd.c
1935 */
1936 int
1937 raidlookup(path, p, vpp)
1938 char *path;
1939 struct proc *p;
1940 struct vnode **vpp; /* result */
1941 {
1942 struct nameidata nd;
1943 struct vnode *vp;
1944 struct vattr va;
1945 int error;
1946
1947 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1948 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1949 #ifdef DEBUG
1950 printf("RAIDframe: vn_open returned %d\n", error);
1951 #endif
1952 return (error);
1953 }
1954 vp = nd.ni_vp;
1955 if (vp->v_usecount > 1) {
1956 VOP_UNLOCK(vp, 0);
1957 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1958 return (EBUSY);
1959 }
1960 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1961 VOP_UNLOCK(vp, 0);
1962 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1963 return (error);
1964 }
1965 /* XXX: eventually we should handle VREG, too. */
1966 if (va.va_type != VBLK) {
1967 VOP_UNLOCK(vp, 0);
1968 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1969 return (ENOTBLK);
1970 }
1971 VOP_UNLOCK(vp, 0);
1972 *vpp = vp;
1973 return (0);
1974 }
1975 /*
1976 * Wait interruptibly for an exclusive lock.
1977 *
1978 * XXX
1979 * Several drivers do this; it should be abstracted and made MP-safe.
1980 * (Hmm... where have we seen this warning before :-> GO )
1981 */
1982 static int
1983 raidlock(rs)
1984 struct raid_softc *rs;
1985 {
1986 int error;
1987
1988 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1989 rs->sc_flags |= RAIDF_WANTED;
1990 if ((error =
1991 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1992 return (error);
1993 }
1994 rs->sc_flags |= RAIDF_LOCKED;
1995 return (0);
1996 }
1997 /*
1998 * Unlock and wake up any waiters.
1999 */
2000 static void
2001 raidunlock(rs)
2002 struct raid_softc *rs;
2003 {
2004
2005 rs->sc_flags &= ~RAIDF_LOCKED;
2006 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2007 rs->sc_flags &= ~RAIDF_WANTED;
2008 wakeup(rs);
2009 }
2010 }
2011
2012
2013 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2014 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2015
2016 int
2017 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2018 {
2019 RF_ComponentLabel_t component_label;
2020 raidread_component_label(dev, b_vp, &component_label);
2021 component_label.mod_counter = mod_counter;
2022 component_label.clean = RF_RAID_CLEAN;
2023 raidwrite_component_label(dev, b_vp, &component_label);
2024 return(0);
2025 }
2026
2027
2028 int
2029 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2030 {
2031 RF_ComponentLabel_t component_label;
2032 raidread_component_label(dev, b_vp, &component_label);
2033 component_label.mod_counter = mod_counter;
2034 component_label.clean = RF_RAID_DIRTY;
2035 raidwrite_component_label(dev, b_vp, &component_label);
2036 return(0);
2037 }
2038
2039 /* ARGSUSED */
2040 int
2041 raidread_component_label(dev, b_vp, component_label)
2042 dev_t dev;
2043 struct vnode *b_vp;
2044 RF_ComponentLabel_t *component_label;
2045 {
2046 struct buf *bp;
2047 int error;
2048
2049 /* XXX should probably ensure that we don't try to do this if
2050 someone has changed rf_protected_sectors. */
2051
2052 /* get a block of the appropriate size... */
2053 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2054 bp->b_dev = dev;
2055
2056 /* get our ducks in a row for the read */
2057 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2058 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2059 bp->b_flags = B_BUSY | B_READ;
2060 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2061
2062 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2063
2064 error = biowait(bp);
2065
2066 if (!error) {
2067 memcpy(component_label, bp->b_un.b_addr,
2068 sizeof(RF_ComponentLabel_t));
2069 #if 0
2070 printf("raidread_component_label: got component label:\n");
2071 printf("Version: %d\n",component_label->version);
2072 printf("Serial Number: %d\n",component_label->serial_number);
2073 printf("Mod counter: %d\n",component_label->mod_counter);
2074 printf("Row: %d\n", component_label->row);
2075 printf("Column: %d\n", component_label->column);
2076 printf("Num Rows: %d\n", component_label->num_rows);
2077 printf("Num Columns: %d\n", component_label->num_columns);
2078 printf("Clean: %d\n", component_label->clean);
2079 printf("Status: %d\n", component_label->status);
2080 #endif
2081 } else {
2082 printf("Failed to read RAID component label!\n");
2083 }
2084
2085 bp->b_flags = B_INVAL | B_AGE;
2086 brelse(bp);
2087 return(error);
2088 }
2089 /* ARGSUSED */
2090 int
2091 raidwrite_component_label(dev, b_vp, component_label)
2092 dev_t dev;
2093 struct vnode *b_vp;
2094 RF_ComponentLabel_t *component_label;
2095 {
2096 struct buf *bp;
2097 int error;
2098
2099 /* get a block of the appropriate size... */
2100 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2101 bp->b_dev = dev;
2102
2103 /* get our ducks in a row for the write */
2104 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2105 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2106 bp->b_flags = B_BUSY | B_WRITE;
2107 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2108
2109 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2110
2111 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2112
2113 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2114 error = biowait(bp);
2115 bp->b_flags = B_INVAL | B_AGE;
2116 brelse(bp);
2117 if (error) {
2118 printf("Failed to write RAID component info!\n");
2119 }
2120
2121 return(error);
2122 }
2123
2124 void
2125 rf_markalldirty( raidPtr )
2126 RF_Raid_t *raidPtr;
2127 {
2128 RF_ComponentLabel_t c_label;
2129 int r,c;
2130
2131 raidPtr->mod_counter++;
2132 for (r = 0; r < raidPtr->numRow; r++) {
2133 for (c = 0; c < raidPtr->numCol; c++) {
2134 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2135 raidread_component_label(
2136 raidPtr->Disks[r][c].dev,
2137 raidPtr->raid_cinfo[r][c].ci_vp,
2138 &c_label);
2139 if (c_label.status == rf_ds_spared) {
2140 /* XXX do something special...
2141 but whatever you do, don't
2142 try to access it!! */
2143 } else {
2144 #if 0
2145 c_label.status =
2146 raidPtr->Disks[r][c].status;
2147 raidwrite_component_label(
2148 raidPtr->Disks[r][c].dev,
2149 raidPtr->raid_cinfo[r][c].ci_vp,
2150 &c_label);
2151 #endif
2152 raidmarkdirty(
2153 raidPtr->Disks[r][c].dev,
2154 raidPtr->raid_cinfo[r][c].ci_vp,
2155 raidPtr->mod_counter);
2156 }
2157 }
2158 }
2159 }
2160 /* printf("Component labels marked dirty.\n"); */
2161 #if 0
2162 for( c = 0; c < raidPtr->numSpare ; c++) {
2163 sparecol = raidPtr->numCol + c;
2164 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2165 /*
2166
2167 XXX this is where we get fancy and map this spare
2168 into it's correct spot in the array.
2169
2170 */
2171 /*
2172
2173 we claim this disk is "optimal" if it's
2174 rf_ds_used_spare, as that means it should be
2175 directly substitutable for the disk it replaced.
2176 We note that too...
2177
2178 */
2179
2180 for(i=0;i<raidPtr->numRow;i++) {
2181 for(j=0;j<raidPtr->numCol;j++) {
2182 if ((raidPtr->Disks[i][j].spareRow ==
2183 r) &&
2184 (raidPtr->Disks[i][j].spareCol ==
2185 sparecol)) {
2186 srow = r;
2187 scol = sparecol;
2188 break;
2189 }
2190 }
2191 }
2192
2193 raidread_component_label(
2194 raidPtr->Disks[r][sparecol].dev,
2195 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2196 &c_label);
2197 /* make sure status is noted */
2198 c_label.version = RF_COMPONENT_LABEL_VERSION;
2199 c_label.mod_counter = raidPtr->mod_counter;
2200 c_label.serial_number = raidPtr->serial_number;
2201 c_label.row = srow;
2202 c_label.column = scol;
2203 c_label.num_rows = raidPtr->numRow;
2204 c_label.num_columns = raidPtr->numCol;
2205 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2206 c_label.status = rf_ds_optimal;
2207 raidwrite_component_label(
2208 raidPtr->Disks[r][sparecol].dev,
2209 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2210 &c_label);
2211 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2212 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2213 }
2214 }
2215
2216 #endif
2217 }
2218
2219
2220 void
2221 rf_update_component_labels( raidPtr )
2222 RF_Raid_t *raidPtr;
2223 {
2224 RF_ComponentLabel_t c_label;
2225 int sparecol;
2226 int r,c;
2227 int i,j;
2228 int srow, scol;
2229
2230 srow = -1;
2231 scol = -1;
2232
2233 /* XXX should do extra checks to make sure things really are clean,
2234 rather than blindly setting the clean bit... */
2235
2236 raidPtr->mod_counter++;
2237
2238 for (r = 0; r < raidPtr->numRow; r++) {
2239 for (c = 0; c < raidPtr->numCol; c++) {
2240 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2241 raidread_component_label(
2242 raidPtr->Disks[r][c].dev,
2243 raidPtr->raid_cinfo[r][c].ci_vp,
2244 &c_label);
2245 /* make sure status is noted */
2246 c_label.status = rf_ds_optimal;
2247 raidwrite_component_label(
2248 raidPtr->Disks[r][c].dev,
2249 raidPtr->raid_cinfo[r][c].ci_vp,
2250 &c_label);
2251 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2252 raidmarkclean(
2253 raidPtr->Disks[r][c].dev,
2254 raidPtr->raid_cinfo[r][c].ci_vp,
2255 raidPtr->mod_counter);
2256 }
2257 }
2258 /* else we don't touch it.. */
2259 #if 0
2260 else if (raidPtr->Disks[r][c].status !=
2261 rf_ds_failed) {
2262 raidread_component_label(
2263 raidPtr->Disks[r][c].dev,
2264 raidPtr->raid_cinfo[r][c].ci_vp,
2265 &c_label);
2266 /* make sure status is noted */
2267 c_label.status =
2268 raidPtr->Disks[r][c].status;
2269 raidwrite_component_label(
2270 raidPtr->Disks[r][c].dev,
2271 raidPtr->raid_cinfo[r][c].ci_vp,
2272 &c_label);
2273 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2274 raidmarkclean(
2275 raidPtr->Disks[r][c].dev,
2276 raidPtr->raid_cinfo[r][c].ci_vp,
2277 raidPtr->mod_counter);
2278 }
2279 }
2280 #endif
2281 }
2282 }
2283
2284 for( c = 0; c < raidPtr->numSpare ; c++) {
2285 sparecol = raidPtr->numCol + c;
2286 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2287 /*
2288
2289 we claim this disk is "optimal" if it's
2290 rf_ds_used_spare, as that means it should be
2291 directly substitutable for the disk it replaced.
2292 We note that too...
2293
2294 */
2295
2296 for(i=0;i<raidPtr->numRow;i++) {
2297 for(j=0;j<raidPtr->numCol;j++) {
2298 if ((raidPtr->Disks[i][j].spareRow ==
2299 0) &&
2300 (raidPtr->Disks[i][j].spareCol ==
2301 sparecol)) {
2302 srow = i;
2303 scol = j;
2304 break;
2305 }
2306 }
2307 }
2308
2309 raidread_component_label(
2310 raidPtr->Disks[0][sparecol].dev,
2311 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2312 &c_label);
2313 /* make sure status is noted */
2314 c_label.version = RF_COMPONENT_LABEL_VERSION;
2315 c_label.mod_counter = raidPtr->mod_counter;
2316 c_label.serial_number = raidPtr->serial_number;
2317 c_label.row = srow;
2318 c_label.column = scol;
2319 c_label.num_rows = raidPtr->numRow;
2320 c_label.num_columns = raidPtr->numCol;
2321 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2322 c_label.status = rf_ds_optimal;
2323 raidwrite_component_label(
2324 raidPtr->Disks[0][sparecol].dev,
2325 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2326 &c_label);
2327 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2328 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2329 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2330 raidPtr->mod_counter);
2331 }
2332 }
2333 }
2334 /* printf("Component labels updated\n"); */
2335 }
2336