rf_netbsdkintf.c revision 1.27 1 /* $NetBSD: rf_netbsdkintf.c,v 1.27 1999/08/14 03:10:03 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 case RAIDFRAME_CHECK_PARITY:
736 if ((rs->sc_flags & RAIDF_INITED) == 0)
737 return (ENXIO);
738 }
739
740 switch (cmd) {
741
742
743 /* configure the system */
744 case RAIDFRAME_CONFIGURE:
745
746 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
747 /* copy-in the configuration information */
748 /* data points to a pointer to the configuration structure */
749 u_cfg = *((RF_Config_t **) data);
750 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
751 if (k_cfg == NULL) {
752 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
753 return (ENOMEM);
754 }
755 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
756 sizeof(RF_Config_t));
757 if (retcode) {
758 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
759 retcode));
760 return (retcode);
761 }
762 /* allocate a buffer for the layout-specific data, and copy it
763 * in */
764 if (k_cfg->layoutSpecificSize) {
765 if (k_cfg->layoutSpecificSize > 10000) {
766 /* sanity check */
767 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
768 return (EINVAL);
769 }
770 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
771 (u_char *));
772 if (specific_buf == NULL) {
773 RF_Free(k_cfg, sizeof(RF_Config_t));
774 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
775 return (ENOMEM);
776 }
777 retcode = copyin(k_cfg->layoutSpecific,
778 (caddr_t) specific_buf,
779 k_cfg->layoutSpecificSize);
780 if (retcode) {
781 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
782 retcode));
783 return (retcode);
784 }
785 } else
786 specific_buf = NULL;
787 k_cfg->layoutSpecific = specific_buf;
788
789 /* should do some kind of sanity check on the configuration.
790 * Store the sum of all the bytes in the last byte? */
791
792 /* configure the system */
793
794 raidPtrs[unit]->raidid = unit;
795
796 retcode = rf_Configure(raidPtrs[unit], k_cfg);
797
798 /* allow this many simultaneous IO's to this RAID device */
799 raidPtrs[unit]->openings = RAIDOUTSTANDING;
800
801 if (retcode == 0) {
802 retcode = raidinit(dev, raidPtrs[unit], unit);
803 rf_markalldirty( raidPtrs[unit] );
804 }
805 /* free the buffers. No return code here. */
806 if (k_cfg->layoutSpecificSize) {
807 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
808 }
809 RF_Free(k_cfg, sizeof(RF_Config_t));
810
811 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
812 retcode));
813
814 return (retcode);
815
816 /* shutdown the system */
817 case RAIDFRAME_SHUTDOWN:
818
819 if ((error = raidlock(rs)) != 0)
820 return (error);
821
822 /*
823 * If somebody has a partition mounted, we shouldn't
824 * shutdown.
825 */
826
827 part = DISKPART(dev);
828 pmask = (1 << part);
829 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
830 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
831 (rs->sc_dkdev.dk_copenmask & pmask))) {
832 raidunlock(rs);
833 return (EBUSY);
834 }
835
836 if (rf_debugKernelAccess) {
837 printf("call shutdown\n");
838 }
839
840 retcode = rf_Shutdown(raidPtrs[unit]);
841
842 db1_printf(("Done main shutdown\n"));
843
844 pool_destroy(&rs->sc_cbufpool);
845 db1_printf(("Done freeing component buffer freelist\n"));
846
847 /* It's no longer initialized... */
848 rs->sc_flags &= ~RAIDF_INITED;
849
850 /* Detach the disk. */
851 disk_detach(&rs->sc_dkdev);
852
853 raidunlock(rs);
854
855 return (retcode);
856 case RAIDFRAME_GET_COMPONENT_LABEL:
857 c_label_ptr = (RF_ComponentLabel_t **) data;
858 /* need to read the component label for the disk indicated
859 by row,column in component_label
860 XXX need to sanity check these values!!!
861 */
862
863 /* For practice, let's get it directly fromdisk, rather
864 than from the in-core copy */
865 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
866 (RF_ComponentLabel_t *));
867 if (component_label == NULL)
868 return (ENOMEM);
869
870 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
871
872 retcode = copyin( *c_label_ptr, component_label,
873 sizeof(RF_ComponentLabel_t));
874
875 if (retcode) {
876 return(retcode);
877 }
878
879 row = component_label->row;
880 column = component_label->column;
881
882 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
883 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
884 return(EINVAL);
885 }
886
887 raidread_component_label(
888 raidPtrs[unit]->Disks[row][column].dev,
889 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
890 component_label );
891
892 retcode = copyout((caddr_t) component_label,
893 (caddr_t) *c_label_ptr,
894 sizeof(RF_ComponentLabel_t));
895 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
896 return (retcode);
897
898 case RAIDFRAME_SET_COMPONENT_LABEL:
899 component_label = (RF_ComponentLabel_t *) data;
900
901 /* XXX check the label for valid stuff... */
902 /* Note that some things *should not* get modified --
903 the user should be re-initing the labels instead of
904 trying to patch things.
905 */
906
907 printf("Got component label:\n");
908 printf("Version: %d\n",component_label->version);
909 printf("Serial Number: %d\n",component_label->serial_number);
910 printf("Mod counter: %d\n",component_label->mod_counter);
911 printf("Row: %d\n", component_label->row);
912 printf("Column: %d\n", component_label->column);
913 printf("Num Rows: %d\n", component_label->num_rows);
914 printf("Num Columns: %d\n", component_label->num_columns);
915 printf("Clean: %d\n", component_label->clean);
916 printf("Status: %d\n", component_label->status);
917
918 row = component_label->row;
919 column = component_label->column;
920
921 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
922 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
923 return(EINVAL);
924 }
925
926 /* XXX this isn't allowed to do anything for now :-) */
927 #if 0
928 raidwrite_component_label(
929 raidPtrs[unit]->Disks[row][column].dev,
930 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
931 component_label );
932 #endif
933 return (0);
934
935 case RAIDFRAME_INIT_LABELS:
936 component_label = (RF_ComponentLabel_t *) data;
937 /*
938 we only want the serial number from
939 the above. We get all the rest of the information
940 from the config that was used to create this RAID
941 set.
942 */
943
944 raidPtrs[unit]->serial_number = component_label->serial_number;
945 /* current version number */
946 ci_label.version = RF_COMPONENT_LABEL_VERSION;
947 ci_label.serial_number = component_label->serial_number;
948 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
949 ci_label.num_rows = raidPtrs[unit]->numRow;
950 ci_label.num_columns = raidPtrs[unit]->numCol;
951 ci_label.clean = RF_RAID_DIRTY; /* not clean */
952 ci_label.status = rf_ds_optimal; /* "It's good!" */
953
954 for(row=0;row<raidPtrs[unit]->numRow;row++) {
955 ci_label.row = row;
956 for(column=0;column<raidPtrs[unit]->numCol;column++) {
957 ci_label.column = column;
958 raidwrite_component_label(
959 raidPtrs[unit]->Disks[row][column].dev,
960 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
961 &ci_label );
962 }
963 }
964
965 return (retcode);
966
967 /* initialize all parity */
968 case RAIDFRAME_REWRITEPARITY:
969
970 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
971 /* Parity for RAID 0 is trivially correct */
972 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
973 return(0);
974 }
975
976 /* borrow the thread of the requesting process */
977
978 s = splbio();
979 retcode = rf_RewriteParity(raidPtrs[unit]);
980 splx(s);
981 /* return I/O Error if the parity rewrite fails */
982
983 if (retcode) {
984 retcode = EIO;
985 } else {
986 /* set the clean bit! If we shutdown correctly,
987 the clean bit on each component label will get
988 set */
989 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
990 }
991 return (retcode);
992
993
994 case RAIDFRAME_ADD_HOT_SPARE:
995 sparePtr = (RF_SingleComponent_t *) data;
996 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
997 printf("Adding spare\n");
998 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
999 return(retcode);
1000
1001 case RAIDFRAME_REMOVE_HOT_SPARE:
1002 return(retcode);
1003
1004 case RAIDFRAME_REBUILD_IN_PLACE:
1005
1006 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1007 /* Can't do this on a RAID 0!! */
1008 return(EINVAL);
1009 }
1010
1011 componentPtr = (RF_SingleComponent_t *) data;
1012 memcpy( &component, componentPtr,
1013 sizeof(RF_SingleComponent_t));
1014 row = component.row;
1015 column = component.column;
1016 printf("Rebuild: %d %d\n",row, column);
1017 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1018 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1019 return(EINVAL);
1020 }
1021 printf("Attempting a rebuild in place\n");
1022 s = splbio();
1023 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1024 splx(s);
1025 return(retcode);
1026
1027 /* issue a test-unit-ready through raidframe to the indicated
1028 * device */
1029 #if 0 /* XXX not supported yet (ever?) */
1030 case RAIDFRAME_TUR:
1031 /* debug only */
1032 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1033 return (retcode);
1034 #endif
1035 case RAIDFRAME_GET_INFO:
1036 {
1037 RF_Raid_t *raid = raidPtrs[unit];
1038 RF_DeviceConfig_t *cfg, **ucfgp;
1039 int i, j, d;
1040
1041 if (!raid->valid)
1042 return (ENODEV);
1043 ucfgp = (RF_DeviceConfig_t **) data;
1044 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1045 (RF_DeviceConfig_t *));
1046 if (cfg == NULL)
1047 return (ENOMEM);
1048 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1049 cfg->rows = raid->numRow;
1050 cfg->cols = raid->numCol;
1051 cfg->ndevs = raid->numRow * raid->numCol;
1052 if (cfg->ndevs >= RF_MAX_DISKS) {
1053 cfg->ndevs = 0;
1054 return (ENOMEM);
1055 }
1056 cfg->nspares = raid->numSpare;
1057 if (cfg->nspares >= RF_MAX_DISKS) {
1058 cfg->nspares = 0;
1059 return (ENOMEM);
1060 }
1061 cfg->maxqdepth = raid->maxQueueDepth;
1062 d = 0;
1063 for (i = 0; i < cfg->rows; i++) {
1064 for (j = 0; j < cfg->cols; j++) {
1065 cfg->devs[d] = raid->Disks[i][j];
1066 d++;
1067 }
1068 }
1069 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1070 cfg->spares[i] = raid->Disks[0][j];
1071 }
1072 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1073 sizeof(RF_DeviceConfig_t));
1074 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1075
1076 return (retcode);
1077 }
1078 break;
1079 case RAIDFRAME_CHECK_PARITY:
1080 *(int *) data = raidPtrs[unit]->parity_good;
1081 return (0);
1082 case RAIDFRAME_RESET_ACCTOTALS:
1083 {
1084 RF_Raid_t *raid = raidPtrs[unit];
1085
1086 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1087 return (0);
1088 }
1089 break;
1090
1091 case RAIDFRAME_GET_ACCTOTALS:
1092 {
1093 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1094 RF_Raid_t *raid = raidPtrs[unit];
1095
1096 *totals = raid->acc_totals;
1097 return (0);
1098 }
1099 break;
1100
1101 case RAIDFRAME_KEEP_ACCTOTALS:
1102 {
1103 RF_Raid_t *raid = raidPtrs[unit];
1104 int *keep = (int *) data;
1105
1106 raid->keep_acc_totals = *keep;
1107 return (0);
1108 }
1109 break;
1110
1111 case RAIDFRAME_GET_SIZE:
1112 *(int *) data = raidPtrs[unit]->totalSectors;
1113 return (0);
1114
1115 #define RAIDFRAME_RECON 1
1116 /* XXX The above should probably be set somewhere else!! GO */
1117 #if RAIDFRAME_RECON > 0
1118
1119 /* fail a disk & optionally start reconstruction */
1120 case RAIDFRAME_FAIL_DISK:
1121
1122 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1123 /* Can't do this on a RAID 0!! */
1124 return(EINVAL);
1125 }
1126
1127 rr = (struct rf_recon_req *) data;
1128
1129 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1130 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1131 return (EINVAL);
1132
1133 printf("raid%d: Failing the disk: row: %d col: %d\n",
1134 unit, rr->row, rr->col);
1135
1136 /* make a copy of the recon request so that we don't rely on
1137 * the user's buffer */
1138 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1139 bcopy(rr, rrcopy, sizeof(*rr));
1140 rrcopy->raidPtr = (void *) raidPtrs[unit];
1141
1142 LOCK_RECON_Q_MUTEX();
1143 rrcopy->next = recon_queue;
1144 recon_queue = rrcopy;
1145 wakeup(&recon_queue);
1146 UNLOCK_RECON_Q_MUTEX();
1147
1148 return (0);
1149
1150 /* invoke a copyback operation after recon on whatever disk
1151 * needs it, if any */
1152 case RAIDFRAME_COPYBACK:
1153
1154 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1155 /* This makes no sense on a RAID 0!! */
1156 return(EINVAL);
1157 }
1158
1159 /* borrow the current thread to get this done */
1160
1161 s = splbio();
1162 rf_CopybackReconstructedData(raidPtrs[unit]);
1163 splx(s);
1164 return (0);
1165
1166 /* return the percentage completion of reconstruction */
1167 case RAIDFRAME_CHECKRECON:
1168 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1169 /* This makes no sense on a RAID 0 */
1170 return(EINVAL);
1171 }
1172
1173 row = *(int *) data;
1174 if (row < 0 || row >= raidPtrs[unit]->numRow)
1175 return (EINVAL);
1176 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1177 *(int *) data = 100;
1178 else
1179 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1180 return (0);
1181
1182 /* the sparetable daemon calls this to wait for the kernel to
1183 * need a spare table. this ioctl does not return until a
1184 * spare table is needed. XXX -- calling mpsleep here in the
1185 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1186 * -- I should either compute the spare table in the kernel,
1187 * or have a different -- XXX XXX -- interface (a different
1188 * character device) for delivering the table -- XXX */
1189 #if 0
1190 case RAIDFRAME_SPARET_WAIT:
1191 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1192 while (!rf_sparet_wait_queue)
1193 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1194 waitreq = rf_sparet_wait_queue;
1195 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1196 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1197
1198 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1199
1200 RF_Free(waitreq, sizeof(*waitreq));
1201 return (0);
1202
1203
1204 /* wakes up a process waiting on SPARET_WAIT and puts an error
1205 * code in it that will cause the dameon to exit */
1206 case RAIDFRAME_ABORT_SPARET_WAIT:
1207 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1208 waitreq->fcol = -1;
1209 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1210 waitreq->next = rf_sparet_wait_queue;
1211 rf_sparet_wait_queue = waitreq;
1212 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1213 wakeup(&rf_sparet_wait_queue);
1214 return (0);
1215
1216 /* used by the spare table daemon to deliver a spare table
1217 * into the kernel */
1218 case RAIDFRAME_SEND_SPARET:
1219
1220 /* install the spare table */
1221 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1222
1223 /* respond to the requestor. the return status of the spare
1224 * table installation is passed in the "fcol" field */
1225 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1226 waitreq->fcol = retcode;
1227 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1228 waitreq->next = rf_sparet_resp_queue;
1229 rf_sparet_resp_queue = waitreq;
1230 wakeup(&rf_sparet_resp_queue);
1231 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1232
1233 return (retcode);
1234 #endif
1235
1236
1237 #endif /* RAIDFRAME_RECON > 0 */
1238
1239 default:
1240 break; /* fall through to the os-specific code below */
1241
1242 }
1243
1244 if (!raidPtrs[unit]->valid)
1245 return (EINVAL);
1246
1247 /*
1248 * Add support for "regular" device ioctls here.
1249 */
1250
1251 switch (cmd) {
1252 case DIOCGDINFO:
1253 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1254 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1255 break;
1256
1257 case DIOCGPART:
1258 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1259 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1260 ((struct partinfo *) data)->part =
1261 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1262 break;
1263
1264 case DIOCWDINFO:
1265 db1_printf(("DIOCWDINFO\n"));
1266 case DIOCSDINFO:
1267 db1_printf(("DIOCSDINFO\n"));
1268 if ((error = raidlock(rs)) != 0)
1269 return (error);
1270
1271 rs->sc_flags |= RAIDF_LABELLING;
1272
1273 error = setdisklabel(rs->sc_dkdev.dk_label,
1274 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1275 if (error == 0) {
1276 if (cmd == DIOCWDINFO)
1277 error = writedisklabel(RAIDLABELDEV(dev),
1278 raidstrategy, rs->sc_dkdev.dk_label,
1279 rs->sc_dkdev.dk_cpulabel);
1280 }
1281 rs->sc_flags &= ~RAIDF_LABELLING;
1282
1283 raidunlock(rs);
1284
1285 if (error)
1286 return (error);
1287 break;
1288
1289 case DIOCWLABEL:
1290 db1_printf(("DIOCWLABEL\n"));
1291 if (*(int *) data != 0)
1292 rs->sc_flags |= RAIDF_WLABEL;
1293 else
1294 rs->sc_flags &= ~RAIDF_WLABEL;
1295 break;
1296
1297 case DIOCGDEFLABEL:
1298 db1_printf(("DIOCGDEFLABEL\n"));
1299 raidgetdefaultlabel(raidPtrs[unit], rs,
1300 (struct disklabel *) data);
1301 break;
1302
1303 default:
1304 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1305 }
1306 return (retcode);
1307
1308 }
1309
1310
1311 /* raidinit -- complete the rest of the initialization for the
1312 RAIDframe device. */
1313
1314
1315 static int
1316 raidinit(dev, raidPtr, unit)
1317 dev_t dev;
1318 RF_Raid_t *raidPtr;
1319 int unit;
1320 {
1321 int retcode;
1322 /* int ix; */
1323 /* struct raidbuf *raidbp; */
1324 struct raid_softc *rs;
1325
1326 retcode = 0;
1327
1328 rs = &raid_softc[unit];
1329 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1330 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1331
1332
1333 /* XXX should check return code first... */
1334 rs->sc_flags |= RAIDF_INITED;
1335
1336 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1337
1338 rs->sc_dkdev.dk_name = rs->sc_xname;
1339
1340 /* disk_attach actually creates space for the CPU disklabel, among
1341 * other things, so it's critical to call this *BEFORE* we try putzing
1342 * with disklabels. */
1343
1344 disk_attach(&rs->sc_dkdev);
1345
1346 /* XXX There may be a weird interaction here between this, and
1347 * protectedSectors, as used in RAIDframe. */
1348
1349 rs->sc_size = raidPtr->totalSectors;
1350 rs->sc_dev = dev;
1351
1352 return (retcode);
1353 }
1354
1355 /*
1356 * This kernel thread never exits. It is created once, and persists
1357 * until the system reboots.
1358 */
1359
1360 void
1361 rf_ReconKernelThread()
1362 {
1363 struct rf_recon_req *req;
1364 int s;
1365
1366 /* XXX not sure what spl() level we should be at here... probably
1367 * splbio() */
1368 s = splbio();
1369
1370 while (1) {
1371 /* grab the next reconstruction request from the queue */
1372 LOCK_RECON_Q_MUTEX();
1373 while (!recon_queue) {
1374 UNLOCK_RECON_Q_MUTEX();
1375 tsleep(&recon_queue, PRIBIO,
1376 "raidframe recon", 0);
1377 LOCK_RECON_Q_MUTEX();
1378 }
1379 req = recon_queue;
1380 recon_queue = recon_queue->next;
1381 UNLOCK_RECON_Q_MUTEX();
1382
1383 /*
1384 * If flags specifies that we should start recon, this call
1385 * will not return until reconstruction completes, fails,
1386 * or is aborted.
1387 */
1388 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1389 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1390
1391 RF_Free(req, sizeof(*req));
1392 }
1393 }
1394 /* wake up the daemon & tell it to get us a spare table
1395 * XXX
1396 * the entries in the queues should be tagged with the raidPtr
1397 * so that in the extremely rare case that two recons happen at once,
1398 * we know for which device were requesting a spare table
1399 * XXX
1400 */
1401 int
1402 rf_GetSpareTableFromDaemon(req)
1403 RF_SparetWait_t *req;
1404 {
1405 int retcode;
1406
1407 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1408 req->next = rf_sparet_wait_queue;
1409 rf_sparet_wait_queue = req;
1410 wakeup(&rf_sparet_wait_queue);
1411
1412 /* mpsleep unlocks the mutex */
1413 while (!rf_sparet_resp_queue) {
1414 tsleep(&rf_sparet_resp_queue, PRIBIO,
1415 "raidframe getsparetable", 0);
1416 #if 0
1417 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1418 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1419 MS_LOCK_SIMPLE);
1420 #endif
1421 }
1422 req = rf_sparet_resp_queue;
1423 rf_sparet_resp_queue = req->next;
1424 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1425
1426 retcode = req->fcol;
1427 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1428 * alloc'd */
1429 return (retcode);
1430 }
1431 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1432 * bp & passes it down.
1433 * any calls originating in the kernel must use non-blocking I/O
1434 * do some extra sanity checking to return "appropriate" error values for
1435 * certain conditions (to make some standard utilities work)
1436 */
1437 int
1438 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1439 RF_Raid_t *raidPtr;
1440 struct buf *bp;
1441 RF_RaidAccessFlags_t flags;
1442 void (*cbFunc) (struct buf *);
1443 void *cbArg;
1444 {
1445 RF_SectorCount_t num_blocks, pb, sum;
1446 RF_RaidAddr_t raid_addr;
1447 int retcode;
1448 struct partition *pp;
1449 daddr_t blocknum;
1450 int unit;
1451 struct raid_softc *rs;
1452 int do_async;
1453
1454 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1455
1456 unit = raidPtr->raidid;
1457 rs = &raid_softc[unit];
1458
1459 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1460 * partition.. Need to make it absolute to the underlying device.. */
1461
1462 blocknum = bp->b_blkno;
1463 if (DISKPART(bp->b_dev) != RAW_PART) {
1464 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1465 blocknum += pp->p_offset;
1466 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1467 pp->p_offset));
1468 } else {
1469 db1_printf(("Is raw..\n"));
1470 }
1471 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1472
1473 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1474 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1475
1476 /* *THIS* is where we adjust what block we're going to... but DO NOT
1477 * TOUCH bp->b_blkno!!! */
1478 raid_addr = blocknum;
1479
1480 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1481 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1482 sum = raid_addr + num_blocks + pb;
1483 if (1 || rf_debugKernelAccess) {
1484 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1485 (int) raid_addr, (int) sum, (int) num_blocks,
1486 (int) pb, (int) bp->b_resid));
1487 }
1488 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1489 || (sum < num_blocks) || (sum < pb)) {
1490 bp->b_error = ENOSPC;
1491 bp->b_flags |= B_ERROR;
1492 bp->b_resid = bp->b_bcount;
1493 biodone(bp);
1494 return (bp->b_error);
1495 }
1496 /*
1497 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1498 */
1499
1500 if (bp->b_bcount & raidPtr->sectorMask) {
1501 bp->b_error = EINVAL;
1502 bp->b_flags |= B_ERROR;
1503 bp->b_resid = bp->b_bcount;
1504 biodone(bp);
1505 return (bp->b_error);
1506 }
1507 db1_printf(("Calling DoAccess..\n"));
1508
1509
1510 /* Put a throttle on the number of requests we handle simultanously */
1511
1512 RF_LOCK_MUTEX(raidPtr->mutex);
1513
1514 while(raidPtr->openings <= 0) {
1515 RF_UNLOCK_MUTEX(raidPtr->mutex);
1516 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1517 RF_LOCK_MUTEX(raidPtr->mutex);
1518 }
1519 raidPtr->openings--;
1520
1521 RF_UNLOCK_MUTEX(raidPtr->mutex);
1522
1523 /*
1524 * Everything is async.
1525 */
1526 do_async = 1;
1527
1528 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1529 * B_READ instead */
1530 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1531 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1532 do_async, raid_addr, num_blocks,
1533 bp->b_un.b_addr,
1534 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1535 NULL, cbFunc, cbArg);
1536 #if 0
1537 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1538 bp->b_data, (int) bp->b_resid));
1539 #endif
1540
1541 return (retcode);
1542 }
1543 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1544
1545 int
1546 rf_DispatchKernelIO(queue, req)
1547 RF_DiskQueue_t *queue;
1548 RF_DiskQueueData_t *req;
1549 {
1550 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1551 struct buf *bp;
1552 struct raidbuf *raidbp = NULL;
1553 struct raid_softc *rs;
1554 int unit;
1555
1556 /* XXX along with the vnode, we also need the softc associated with
1557 * this device.. */
1558
1559 req->queue = queue;
1560
1561 unit = queue->raidPtr->raidid;
1562
1563 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1564
1565 if (unit >= numraid) {
1566 printf("Invalid unit number: %d %d\n", unit, numraid);
1567 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1568 }
1569 rs = &raid_softc[unit];
1570
1571 /* XXX is this the right place? */
1572 disk_busy(&rs->sc_dkdev);
1573
1574 bp = req->bp;
1575 #if 1
1576 /* XXX when there is a physical disk failure, someone is passing us a
1577 * buffer that contains old stuff!! Attempt to deal with this problem
1578 * without taking a performance hit... (not sure where the real bug
1579 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1580
1581 if (bp->b_flags & B_ERROR) {
1582 bp->b_flags &= ~B_ERROR;
1583 }
1584 if (bp->b_error != 0) {
1585 bp->b_error = 0;
1586 }
1587 #endif
1588 raidbp = RAIDGETBUF(rs);
1589
1590 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1591
1592 /*
1593 * context for raidiodone
1594 */
1595 raidbp->rf_obp = bp;
1596 raidbp->req = req;
1597
1598 switch (req->type) {
1599 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1600 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1601 * queue->row, queue->col); */
1602 /* XXX need to do something extra here.. */
1603 /* I'm leaving this in, as I've never actually seen it used,
1604 * and I'd like folks to report it... GO */
1605 printf(("WAKEUP CALLED\n"));
1606 queue->numOutstanding++;
1607
1608 /* XXX need to glue the original buffer into this?? */
1609
1610 KernelWakeupFunc(&raidbp->rf_buf);
1611 break;
1612
1613 case RF_IO_TYPE_READ:
1614 case RF_IO_TYPE_WRITE:
1615
1616 if (req->tracerec) {
1617 RF_ETIMER_START(req->tracerec->timer);
1618 }
1619 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1620 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1621 req->sectorOffset, req->numSector,
1622 req->buf, KernelWakeupFunc, (void *) req,
1623 queue->raidPtr->logBytesPerSector, req->b_proc);
1624
1625 if (rf_debugKernelAccess) {
1626 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1627 (long) bp->b_blkno));
1628 }
1629 queue->numOutstanding++;
1630 queue->last_deq_sector = req->sectorOffset;
1631 /* acc wouldn't have been let in if there were any pending
1632 * reqs at any other priority */
1633 queue->curPriority = req->priority;
1634 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1635 * req->type, queue->row, queue->col); */
1636
1637 db1_printf(("Going for %c to unit %d row %d col %d\n",
1638 req->type, unit, queue->row, queue->col));
1639 db1_printf(("sector %d count %d (%d bytes) %d\n",
1640 (int) req->sectorOffset, (int) req->numSector,
1641 (int) (req->numSector <<
1642 queue->raidPtr->logBytesPerSector),
1643 (int) queue->raidPtr->logBytesPerSector));
1644 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1645 raidbp->rf_buf.b_vp->v_numoutput++;
1646 }
1647 VOP_STRATEGY(&raidbp->rf_buf);
1648
1649 break;
1650
1651 default:
1652 panic("bad req->type in rf_DispatchKernelIO");
1653 }
1654 db1_printf(("Exiting from DispatchKernelIO\n"));
1655 return (0);
1656 }
1657 /* this is the callback function associated with a I/O invoked from
1658 kernel code.
1659 */
1660 static void
1661 KernelWakeupFunc(vbp)
1662 struct buf *vbp;
1663 {
1664 RF_DiskQueueData_t *req = NULL;
1665 RF_DiskQueue_t *queue;
1666 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1667 struct buf *bp;
1668 struct raid_softc *rs;
1669 int unit;
1670 register int s;
1671
1672 s = splbio(); /* XXX */
1673 db1_printf(("recovering the request queue:\n"));
1674 req = raidbp->req;
1675
1676 bp = raidbp->rf_obp;
1677 #if 0
1678 db1_printf(("bp=0x%x\n", bp));
1679 #endif
1680
1681 queue = (RF_DiskQueue_t *) req->queue;
1682
1683 if (raidbp->rf_buf.b_flags & B_ERROR) {
1684 #if 0
1685 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1686 #endif
1687 bp->b_flags |= B_ERROR;
1688 bp->b_error = raidbp->rf_buf.b_error ?
1689 raidbp->rf_buf.b_error : EIO;
1690 }
1691 #if 0
1692 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1693 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1694 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1695 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1696 #endif
1697
1698 /* XXX methinks this could be wrong... */
1699 #if 1
1700 bp->b_resid = raidbp->rf_buf.b_resid;
1701 #endif
1702
1703 if (req->tracerec) {
1704 RF_ETIMER_STOP(req->tracerec->timer);
1705 RF_ETIMER_EVAL(req->tracerec->timer);
1706 RF_LOCK_MUTEX(rf_tracing_mutex);
1707 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1708 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1709 req->tracerec->num_phys_ios++;
1710 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1711 }
1712 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1713
1714 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1715
1716
1717 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1718 * ballistic, and mark the component as hosed... */
1719 #if 1
1720 if (bp->b_flags & B_ERROR) {
1721 /* Mark the disk as dead */
1722 /* but only mark it once... */
1723 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1724 rf_ds_optimal) {
1725 printf("raid%d: IO Error. Marking %s as failed.\n",
1726 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1727 queue->raidPtr->Disks[queue->row][queue->col].status =
1728 rf_ds_failed;
1729 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1730 queue->raidPtr->numFailures++;
1731 /* XXX here we should bump the version number for each component, and write that data out */
1732 } else { /* Disk is already dead... */
1733 /* printf("Disk already marked as dead!\n"); */
1734 }
1735
1736 }
1737 #endif
1738
1739 rs = &raid_softc[unit];
1740 RAIDPUTBUF(rs, raidbp);
1741
1742
1743 if (bp->b_resid == 0) {
1744 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1745 unit, bp->b_resid, bp->b_bcount));
1746 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1747 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1748 } else {
1749 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1750 }
1751
1752 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1753 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1754 /* printf("Exiting KernelWakeupFunc\n"); */
1755
1756 splx(s); /* XXX */
1757 }
1758
1759
1760
1761 /*
1762 * initialize a buf structure for doing an I/O in the kernel.
1763 */
1764 static void
1765 InitBP(
1766 struct buf * bp,
1767 struct vnode * b_vp,
1768 unsigned rw_flag,
1769 dev_t dev,
1770 RF_SectorNum_t startSect,
1771 RF_SectorCount_t numSect,
1772 caddr_t buf,
1773 void (*cbFunc) (struct buf *),
1774 void *cbArg,
1775 int logBytesPerSector,
1776 struct proc * b_proc)
1777 {
1778 /* bp->b_flags = B_PHYS | rw_flag; */
1779 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1780 bp->b_bcount = numSect << logBytesPerSector;
1781 bp->b_bufsize = bp->b_bcount;
1782 bp->b_error = 0;
1783 bp->b_dev = dev;
1784 db1_printf(("bp->b_dev is %d\n", dev));
1785 bp->b_un.b_addr = buf;
1786 #if 0
1787 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1788 #endif
1789
1790 bp->b_blkno = startSect;
1791 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1792 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1793 if (bp->b_bcount == 0) {
1794 panic("bp->b_bcount is zero in InitBP!!\n");
1795 }
1796 bp->b_proc = b_proc;
1797 bp->b_iodone = cbFunc;
1798 bp->b_vp = b_vp;
1799
1800 }
1801 /* Extras... */
1802
1803 #if 0
1804 int
1805 rf_GetSpareTableFromDaemon(req)
1806 RF_SparetWait_t *req;
1807 {
1808 int retcode = 1;
1809 printf("This is supposed to do something useful!!\n"); /* XXX */
1810
1811 return (retcode);
1812
1813 }
1814 #endif
1815
1816 static void
1817 raidgetdefaultlabel(raidPtr, rs, lp)
1818 RF_Raid_t *raidPtr;
1819 struct raid_softc *rs;
1820 struct disklabel *lp;
1821 {
1822 db1_printf(("Building a default label...\n"));
1823 bzero(lp, sizeof(*lp));
1824
1825 /* fabricate a label... */
1826 lp->d_secperunit = raidPtr->totalSectors;
1827 lp->d_secsize = raidPtr->bytesPerSector;
1828 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1829 lp->d_ntracks = 1;
1830 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1831 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1832
1833 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1834 lp->d_type = DTYPE_RAID;
1835 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1836 lp->d_rpm = 3600;
1837 lp->d_interleave = 1;
1838 lp->d_flags = 0;
1839
1840 lp->d_partitions[RAW_PART].p_offset = 0;
1841 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1842 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1843 lp->d_npartitions = RAW_PART + 1;
1844
1845 lp->d_magic = DISKMAGIC;
1846 lp->d_magic2 = DISKMAGIC;
1847 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1848
1849 }
1850 /*
1851 * Read the disklabel from the raid device. If one is not present, fake one
1852 * up.
1853 */
1854 static void
1855 raidgetdisklabel(dev)
1856 dev_t dev;
1857 {
1858 int unit = raidunit(dev);
1859 struct raid_softc *rs = &raid_softc[unit];
1860 char *errstring;
1861 struct disklabel *lp = rs->sc_dkdev.dk_label;
1862 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1863 RF_Raid_t *raidPtr;
1864
1865 db1_printf(("Getting the disklabel...\n"));
1866
1867 bzero(clp, sizeof(*clp));
1868
1869 raidPtr = raidPtrs[unit];
1870
1871 raidgetdefaultlabel(raidPtr, rs, lp);
1872
1873 /*
1874 * Call the generic disklabel extraction routine.
1875 */
1876 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1877 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1878 if (errstring)
1879 raidmakedisklabel(rs);
1880 else {
1881 int i;
1882 struct partition *pp;
1883
1884 /*
1885 * Sanity check whether the found disklabel is valid.
1886 *
1887 * This is necessary since total size of the raid device
1888 * may vary when an interleave is changed even though exactly
1889 * same componets are used, and old disklabel may used
1890 * if that is found.
1891 */
1892 if (lp->d_secperunit != rs->sc_size)
1893 printf("WARNING: %s: "
1894 "total sector size in disklabel (%d) != "
1895 "the size of raid (%ld)\n", rs->sc_xname,
1896 lp->d_secperunit, (long) rs->sc_size);
1897 for (i = 0; i < lp->d_npartitions; i++) {
1898 pp = &lp->d_partitions[i];
1899 if (pp->p_offset + pp->p_size > rs->sc_size)
1900 printf("WARNING: %s: end of partition `%c' "
1901 "exceeds the size of raid (%ld)\n",
1902 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1903 }
1904 }
1905
1906 }
1907 /*
1908 * Take care of things one might want to take care of in the event
1909 * that a disklabel isn't present.
1910 */
1911 static void
1912 raidmakedisklabel(rs)
1913 struct raid_softc *rs;
1914 {
1915 struct disklabel *lp = rs->sc_dkdev.dk_label;
1916 db1_printf(("Making a label..\n"));
1917
1918 /*
1919 * For historical reasons, if there's no disklabel present
1920 * the raw partition must be marked FS_BSDFFS.
1921 */
1922
1923 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1924
1925 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1926
1927 lp->d_checksum = dkcksum(lp);
1928 }
1929 /*
1930 * Lookup the provided name in the filesystem. If the file exists,
1931 * is a valid block device, and isn't being used by anyone else,
1932 * set *vpp to the file's vnode.
1933 * You'll find the original of this in ccd.c
1934 */
1935 int
1936 raidlookup(path, p, vpp)
1937 char *path;
1938 struct proc *p;
1939 struct vnode **vpp; /* result */
1940 {
1941 struct nameidata nd;
1942 struct vnode *vp;
1943 struct vattr va;
1944 int error;
1945
1946 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1947 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1948 #ifdef DEBUG
1949 printf("RAIDframe: vn_open returned %d\n", error);
1950 #endif
1951 return (error);
1952 }
1953 vp = nd.ni_vp;
1954 if (vp->v_usecount > 1) {
1955 VOP_UNLOCK(vp, 0);
1956 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1957 return (EBUSY);
1958 }
1959 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1960 VOP_UNLOCK(vp, 0);
1961 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1962 return (error);
1963 }
1964 /* XXX: eventually we should handle VREG, too. */
1965 if (va.va_type != VBLK) {
1966 VOP_UNLOCK(vp, 0);
1967 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1968 return (ENOTBLK);
1969 }
1970 VOP_UNLOCK(vp, 0);
1971 *vpp = vp;
1972 return (0);
1973 }
1974 /*
1975 * Wait interruptibly for an exclusive lock.
1976 *
1977 * XXX
1978 * Several drivers do this; it should be abstracted and made MP-safe.
1979 * (Hmm... where have we seen this warning before :-> GO )
1980 */
1981 static int
1982 raidlock(rs)
1983 struct raid_softc *rs;
1984 {
1985 int error;
1986
1987 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1988 rs->sc_flags |= RAIDF_WANTED;
1989 if ((error =
1990 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1991 return (error);
1992 }
1993 rs->sc_flags |= RAIDF_LOCKED;
1994 return (0);
1995 }
1996 /*
1997 * Unlock and wake up any waiters.
1998 */
1999 static void
2000 raidunlock(rs)
2001 struct raid_softc *rs;
2002 {
2003
2004 rs->sc_flags &= ~RAIDF_LOCKED;
2005 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2006 rs->sc_flags &= ~RAIDF_WANTED;
2007 wakeup(rs);
2008 }
2009 }
2010
2011
2012 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2013 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2014
2015 int
2016 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2017 {
2018 RF_ComponentLabel_t component_label;
2019 raidread_component_label(dev, b_vp, &component_label);
2020 component_label.mod_counter = mod_counter;
2021 component_label.clean = RF_RAID_CLEAN;
2022 raidwrite_component_label(dev, b_vp, &component_label);
2023 return(0);
2024 }
2025
2026
2027 int
2028 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2029 {
2030 RF_ComponentLabel_t component_label;
2031 raidread_component_label(dev, b_vp, &component_label);
2032 component_label.mod_counter = mod_counter;
2033 component_label.clean = RF_RAID_DIRTY;
2034 raidwrite_component_label(dev, b_vp, &component_label);
2035 return(0);
2036 }
2037
2038 /* ARGSUSED */
2039 int
2040 raidread_component_label(dev, b_vp, component_label)
2041 dev_t dev;
2042 struct vnode *b_vp;
2043 RF_ComponentLabel_t *component_label;
2044 {
2045 struct buf *bp;
2046 int error;
2047
2048 /* XXX should probably ensure that we don't try to do this if
2049 someone has changed rf_protected_sectors. */
2050
2051 /* get a block of the appropriate size... */
2052 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2053 bp->b_dev = dev;
2054
2055 /* get our ducks in a row for the read */
2056 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2057 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2058 bp->b_flags = B_BUSY | B_READ;
2059 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2060
2061 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2062
2063 error = biowait(bp);
2064
2065 if (!error) {
2066 memcpy(component_label, bp->b_un.b_addr,
2067 sizeof(RF_ComponentLabel_t));
2068 #if 0
2069 printf("raidread_component_label: got component label:\n");
2070 printf("Version: %d\n",component_label->version);
2071 printf("Serial Number: %d\n",component_label->serial_number);
2072 printf("Mod counter: %d\n",component_label->mod_counter);
2073 printf("Row: %d\n", component_label->row);
2074 printf("Column: %d\n", component_label->column);
2075 printf("Num Rows: %d\n", component_label->num_rows);
2076 printf("Num Columns: %d\n", component_label->num_columns);
2077 printf("Clean: %d\n", component_label->clean);
2078 printf("Status: %d\n", component_label->status);
2079 #endif
2080 } else {
2081 printf("Failed to read RAID component label!\n");
2082 }
2083
2084 bp->b_flags = B_INVAL | B_AGE;
2085 brelse(bp);
2086 return(error);
2087 }
2088 /* ARGSUSED */
2089 int
2090 raidwrite_component_label(dev, b_vp, component_label)
2091 dev_t dev;
2092 struct vnode *b_vp;
2093 RF_ComponentLabel_t *component_label;
2094 {
2095 struct buf *bp;
2096 int error;
2097
2098 /* get a block of the appropriate size... */
2099 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2100 bp->b_dev = dev;
2101
2102 /* get our ducks in a row for the write */
2103 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2104 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2105 bp->b_flags = B_BUSY | B_WRITE;
2106 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2107
2108 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2109
2110 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2111
2112 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2113 error = biowait(bp);
2114 bp->b_flags = B_INVAL | B_AGE;
2115 brelse(bp);
2116 if (error) {
2117 printf("Failed to write RAID component info!\n");
2118 }
2119
2120 return(error);
2121 }
2122
2123 void
2124 rf_markalldirty( raidPtr )
2125 RF_Raid_t *raidPtr;
2126 {
2127 RF_ComponentLabel_t c_label;
2128 int r,c;
2129
2130 raidPtr->mod_counter++;
2131 for (r = 0; r < raidPtr->numRow; r++) {
2132 for (c = 0; c < raidPtr->numCol; c++) {
2133 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2134 raidread_component_label(
2135 raidPtr->Disks[r][c].dev,
2136 raidPtr->raid_cinfo[r][c].ci_vp,
2137 &c_label);
2138 if (c_label.status == rf_ds_spared) {
2139 /* XXX do something special...
2140 but whatever you do, don't
2141 try to access it!! */
2142 } else {
2143 #if 0
2144 c_label.status =
2145 raidPtr->Disks[r][c].status;
2146 raidwrite_component_label(
2147 raidPtr->Disks[r][c].dev,
2148 raidPtr->raid_cinfo[r][c].ci_vp,
2149 &c_label);
2150 #endif
2151 raidmarkdirty(
2152 raidPtr->Disks[r][c].dev,
2153 raidPtr->raid_cinfo[r][c].ci_vp,
2154 raidPtr->mod_counter);
2155 }
2156 }
2157 }
2158 }
2159 /* printf("Component labels marked dirty.\n"); */
2160 #if 0
2161 for( c = 0; c < raidPtr->numSpare ; c++) {
2162 sparecol = raidPtr->numCol + c;
2163 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2164 /*
2165
2166 XXX this is where we get fancy and map this spare
2167 into it's correct spot in the array.
2168
2169 */
2170 /*
2171
2172 we claim this disk is "optimal" if it's
2173 rf_ds_used_spare, as that means it should be
2174 directly substitutable for the disk it replaced.
2175 We note that too...
2176
2177 */
2178
2179 for(i=0;i<raidPtr->numRow;i++) {
2180 for(j=0;j<raidPtr->numCol;j++) {
2181 if ((raidPtr->Disks[i][j].spareRow ==
2182 r) &&
2183 (raidPtr->Disks[i][j].spareCol ==
2184 sparecol)) {
2185 srow = r;
2186 scol = sparecol;
2187 break;
2188 }
2189 }
2190 }
2191
2192 raidread_component_label(
2193 raidPtr->Disks[r][sparecol].dev,
2194 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2195 &c_label);
2196 /* make sure status is noted */
2197 c_label.version = RF_COMPONENT_LABEL_VERSION;
2198 c_label.mod_counter = raidPtr->mod_counter;
2199 c_label.serial_number = raidPtr->serial_number;
2200 c_label.row = srow;
2201 c_label.column = scol;
2202 c_label.num_rows = raidPtr->numRow;
2203 c_label.num_columns = raidPtr->numCol;
2204 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2205 c_label.status = rf_ds_optimal;
2206 raidwrite_component_label(
2207 raidPtr->Disks[r][sparecol].dev,
2208 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2209 &c_label);
2210 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2211 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2212 }
2213 }
2214
2215 #endif
2216 }
2217
2218
2219 void
2220 rf_update_component_labels( raidPtr )
2221 RF_Raid_t *raidPtr;
2222 {
2223 RF_ComponentLabel_t c_label;
2224 int sparecol;
2225 int r,c;
2226 int i,j;
2227 int srow, scol;
2228
2229 srow = -1;
2230 scol = -1;
2231
2232 /* XXX should do extra checks to make sure things really are clean,
2233 rather than blindly setting the clean bit... */
2234
2235 raidPtr->mod_counter++;
2236
2237 for (r = 0; r < raidPtr->numRow; r++) {
2238 for (c = 0; c < raidPtr->numCol; c++) {
2239 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2240 raidread_component_label(
2241 raidPtr->Disks[r][c].dev,
2242 raidPtr->raid_cinfo[r][c].ci_vp,
2243 &c_label);
2244 /* make sure status is noted */
2245 c_label.status = rf_ds_optimal;
2246 raidwrite_component_label(
2247 raidPtr->Disks[r][c].dev,
2248 raidPtr->raid_cinfo[r][c].ci_vp,
2249 &c_label);
2250 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2251 raidmarkclean(
2252 raidPtr->Disks[r][c].dev,
2253 raidPtr->raid_cinfo[r][c].ci_vp,
2254 raidPtr->mod_counter);
2255 }
2256 }
2257 /* else we don't touch it.. */
2258 #if 0
2259 else if (raidPtr->Disks[r][c].status !=
2260 rf_ds_failed) {
2261 raidread_component_label(
2262 raidPtr->Disks[r][c].dev,
2263 raidPtr->raid_cinfo[r][c].ci_vp,
2264 &c_label);
2265 /* make sure status is noted */
2266 c_label.status =
2267 raidPtr->Disks[r][c].status;
2268 raidwrite_component_label(
2269 raidPtr->Disks[r][c].dev,
2270 raidPtr->raid_cinfo[r][c].ci_vp,
2271 &c_label);
2272 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2273 raidmarkclean(
2274 raidPtr->Disks[r][c].dev,
2275 raidPtr->raid_cinfo[r][c].ci_vp,
2276 raidPtr->mod_counter);
2277 }
2278 }
2279 #endif
2280 }
2281 }
2282
2283 for( c = 0; c < raidPtr->numSpare ; c++) {
2284 sparecol = raidPtr->numCol + c;
2285 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2286 /*
2287
2288 we claim this disk is "optimal" if it's
2289 rf_ds_used_spare, as that means it should be
2290 directly substitutable for the disk it replaced.
2291 We note that too...
2292
2293 */
2294
2295 for(i=0;i<raidPtr->numRow;i++) {
2296 for(j=0;j<raidPtr->numCol;j++) {
2297 if ((raidPtr->Disks[i][j].spareRow ==
2298 0) &&
2299 (raidPtr->Disks[i][j].spareCol ==
2300 sparecol)) {
2301 srow = i;
2302 scol = j;
2303 break;
2304 }
2305 }
2306 }
2307
2308 raidread_component_label(
2309 raidPtr->Disks[0][sparecol].dev,
2310 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2311 &c_label);
2312 /* make sure status is noted */
2313 c_label.version = RF_COMPONENT_LABEL_VERSION;
2314 c_label.mod_counter = raidPtr->mod_counter;
2315 c_label.serial_number = raidPtr->serial_number;
2316 c_label.row = srow;
2317 c_label.column = scol;
2318 c_label.num_rows = raidPtr->numRow;
2319 c_label.num_columns = raidPtr->numCol;
2320 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2321 c_label.status = rf_ds_optimal;
2322 raidwrite_component_label(
2323 raidPtr->Disks[0][sparecol].dev,
2324 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2325 &c_label);
2326 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2327 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2328 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2329 raidPtr->mod_counter);
2330 }
2331 }
2332 }
2333 /* printf("Component labels updated\n"); */
2334 }
2335