rf_netbsdkintf.c revision 1.14 1 /* $NetBSD: rf_netbsdkintf.c,v 1.14 1999/03/09 03:53:18 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raid_shutdown(void *);
217
218 void raidattach __P((int));
219 int raidsize __P((dev_t));
220
221 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
222 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
223 static int raidinit __P((dev_t, RF_Raid_t *, int));
224
225 int raidopen __P((dev_t, int, int, struct proc *));
226 int raidclose __P((dev_t, int, int, struct proc *));
227 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
228 int raidwrite __P((dev_t, struct uio *, int));
229 int raidread __P((dev_t, struct uio *, int));
230 void raidstrategy __P((struct buf *));
231 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
232
233 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
234 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
235 void rf_update_component_labels( RF_Raid_t *);
236 /*
237 * Pilfered from ccd.c
238 */
239
240 struct raidbuf {
241 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
242 struct buf *rf_obp; /* ptr. to original I/O buf */
243 int rf_flags; /* misc. flags */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247
248 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
249 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
250
251 /* XXX Not sure if the following should be replacing the raidPtrs above,
252 or if it should be used in conjunction with that... */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 dev_t sc_dev; /* our device.. */
259 void * sc_sdhook; /* our shutdown hook */
260 char sc_xname[20]; /* XXX external name */
261 struct disk sc_dkdev; /* generic disk device info */
262 struct pool sc_cbufpool; /* component buffer pool */
263 };
264 /* sc_flags */
265 #define RAIDF_INITED 0x01 /* unit has been initialized */
266 #define RAIDF_WLABEL 0x02 /* label area is writable */
267 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
268 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
269 #define RAIDF_LOCKED 0x80 /* unit is locked */
270
271 #define raidunit(x) DISKUNIT(x)
272 static int numraid = 0;
273
274 #define RAIDLABELDEV(dev) \
275 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
276
277 /* declared here, and made public, for the benefit of KVM stuff.. */
278 struct raid_softc *raid_softc;
279
280 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
281 struct disklabel *));
282 static void raidgetdisklabel __P((dev_t));
283 static void raidmakedisklabel __P((struct raid_softc *));
284
285 static int raidlock __P((struct raid_softc *));
286 static void raidunlock __P((struct raid_softc *));
287 int raidlookup __P((char *, struct proc * p, struct vnode **));
288
289 static void rf_markalldirty __P((RF_Raid_t *));
290
291 void
292 raidattach(num)
293 int num;
294 {
295 int raidID;
296 int i, rc;
297
298 #ifdef DEBUG
299 printf("raidattach: Asked for %d units\n", num);
300 #endif
301
302 if (num <= 0) {
303 #ifdef DIAGNOSTIC
304 panic("raidattach: count <= 0");
305 #endif
306 return;
307 }
308 /* This is where all the initialization stuff gets done. */
309
310 /* Make some space for requested number of units... */
311
312 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
313 if (raidPtrs == NULL) {
314 panic("raidPtrs is NULL!!\n");
315 }
316
317 rc = rf_mutex_init(&rf_sparet_wait_mutex);
318 if (rc) {
319 RF_PANIC();
320 }
321
322 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
323 recon_queue = NULL;
324
325 for (i = 0; i < numraid; i++)
326 raidPtrs[i] = NULL;
327 rc = rf_BootRaidframe();
328 if (rc == 0)
329 printf("Kernelized RAIDframe activated\n");
330 else
331 panic("Serious error booting RAID!!\n");
332
333 rf_kbooted = RFK_BOOT_GOOD;
334
335 /* put together some datastructures like the CCD device does.. This
336 * lets us lock the device and what-not when it gets opened. */
337
338 raid_softc = (struct raid_softc *)
339 malloc(num * sizeof(struct raid_softc),
340 M_RAIDFRAME, M_NOWAIT);
341 if (raid_softc == NULL) {
342 printf("WARNING: no memory for RAIDframe driver\n");
343 return;
344 }
345 numraid = num;
346 bzero(raid_softc, num * sizeof(struct raid_softc));
347
348 for (raidID = 0; raidID < num; raidID++) {
349 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
350 (RF_Raid_t *));
351 if (raidPtrs[raidID] == NULL) {
352 printf("raidPtrs[%d] is NULL\n", raidID);
353 }
354 }
355 }
356
357
358 int
359 raidsize(dev)
360 dev_t dev;
361 {
362 struct raid_softc *rs;
363 struct disklabel *lp;
364 int part, unit, omask, size;
365
366 unit = raidunit(dev);
367 if (unit >= numraid)
368 return (-1);
369 rs = &raid_softc[unit];
370
371 if ((rs->sc_flags & RAIDF_INITED) == 0)
372 return (-1);
373
374 part = DISKPART(dev);
375 omask = rs->sc_dkdev.dk_openmask & (1 << part);
376 lp = rs->sc_dkdev.dk_label;
377
378 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
379 return (-1);
380
381 if (lp->d_partitions[part].p_fstype != FS_SWAP)
382 size = -1;
383 else
384 size = lp->d_partitions[part].p_size *
385 (lp->d_secsize / DEV_BSIZE);
386
387 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
388 return (-1);
389
390 return (size);
391
392 }
393
394 int
395 raiddump(dev, blkno, va, size)
396 dev_t dev;
397 daddr_t blkno;
398 caddr_t va;
399 size_t size;
400 {
401 /* Not implemented. */
402 return ENXIO;
403 }
404 /* ARGSUSED */
405 int
406 raidopen(dev, flags, fmt, p)
407 dev_t dev;
408 int flags, fmt;
409 struct proc *p;
410 {
411 int unit = raidunit(dev);
412 struct raid_softc *rs;
413 struct disklabel *lp;
414 int part, pmask;
415 int error = 0;
416
417 if (unit >= numraid)
418 return (ENXIO);
419 rs = &raid_softc[unit];
420
421 if ((error = raidlock(rs)) != 0)
422 return (error);
423 lp = rs->sc_dkdev.dk_label;
424
425 part = DISKPART(dev);
426 pmask = (1 << part);
427
428 db1_printf(("Opening raid device number: %d partition: %d\n",
429 unit, part));
430
431
432 if ((rs->sc_flags & RAIDF_INITED) &&
433 (rs->sc_dkdev.dk_openmask == 0))
434 raidgetdisklabel(dev);
435
436 /* make sure that this partition exists */
437
438 if (part != RAW_PART) {
439 db1_printf(("Not a raw partition..\n"));
440 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
441 ((part >= lp->d_npartitions) ||
442 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
443 error = ENXIO;
444 raidunlock(rs);
445 db1_printf(("Bailing out...\n"));
446 return (error);
447 }
448 }
449 /* Prevent this unit from being unconfigured while open. */
450 switch (fmt) {
451 case S_IFCHR:
452 rs->sc_dkdev.dk_copenmask |= pmask;
453 break;
454
455 case S_IFBLK:
456 rs->sc_dkdev.dk_bopenmask |= pmask;
457 break;
458 }
459
460 if ((rs->sc_dkdev.dk_openmask == 0) &&
461 ((rs->sc_flags & RAIDF_INITED) != 0)) {
462 /* First one... mark things as dirty... Note that we *MUST*
463 have done a configure before this. I DO NOT WANT TO BE
464 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
465 THAT THEY BELONG TOGETHER!!!!! */
466 /* XXX should check to see if we're only open for reading
467 here... If so, we needn't do this, but then need some
468 other way of keeping track of what's happened.. */
469
470 rf_markalldirty( raidPtrs[unit] );
471 }
472
473
474 rs->sc_dkdev.dk_openmask =
475 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
476
477 raidunlock(rs);
478
479 return (error);
480
481
482 }
483 /* ARGSUSED */
484 int
485 raidclose(dev, flags, fmt, p)
486 dev_t dev;
487 int flags, fmt;
488 struct proc *p;
489 {
490 int unit = raidunit(dev);
491 struct raid_softc *rs;
492 int error = 0;
493 int part;
494
495 if (unit >= numraid)
496 return (ENXIO);
497 rs = &raid_softc[unit];
498
499 if ((error = raidlock(rs)) != 0)
500 return (error);
501
502 part = DISKPART(dev);
503
504 /* ...that much closer to allowing unconfiguration... */
505 switch (fmt) {
506 case S_IFCHR:
507 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
508 break;
509
510 case S_IFBLK:
511 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
512 break;
513 }
514 rs->sc_dkdev.dk_openmask =
515 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
516
517 if ((rs->sc_dkdev.dk_openmask == 0) &&
518 ((rs->sc_flags & RAIDF_INITED) != 0)) {
519 /* Last one... device is not unconfigured yet.
520 Device shutdown has taken care of setting the
521 clean bits if RAIDF_INITED is not set
522 mark things as clean... */
523 rf_update_component_labels( raidPtrs[unit] );
524 }
525
526 raidunlock(rs);
527 return (0);
528
529 }
530
531 void
532 raidstrategy(bp)
533 register struct buf *bp;
534 {
535 register int s;
536
537 unsigned int raidID = raidunit(bp->b_dev);
538 RF_Raid_t *raidPtr;
539 struct raid_softc *rs = &raid_softc[raidID];
540 struct disklabel *lp;
541 int wlabel;
542
543 #if 0
544 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
545 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
546 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
547 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
548 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
549
550 if (bp->b_flags & B_READ)
551 db1_printf(("READ\n"));
552 else
553 db1_printf(("WRITE\n"));
554 #endif
555 if (rf_kbooted != RFK_BOOT_GOOD)
556 return;
557 if (raidID >= numraid || !raidPtrs[raidID]) {
558 bp->b_error = ENODEV;
559 bp->b_flags |= B_ERROR;
560 bp->b_resid = bp->b_bcount;
561 biodone(bp);
562 return;
563 }
564 raidPtr = raidPtrs[raidID];
565 if (!raidPtr->valid) {
566 bp->b_error = ENODEV;
567 bp->b_flags |= B_ERROR;
568 bp->b_resid = bp->b_bcount;
569 biodone(bp);
570 return;
571 }
572 if (bp->b_bcount == 0) {
573 db1_printf(("b_bcount is zero..\n"));
574 biodone(bp);
575 return;
576 }
577 lp = rs->sc_dkdev.dk_label;
578
579 /*
580 * Do bounds checking and adjust transfer. If there's an
581 * error, the bounds check will flag that for us.
582 */
583
584 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
585 if (DISKPART(bp->b_dev) != RAW_PART)
586 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
587 db1_printf(("Bounds check failed!!:%d %d\n",
588 (int) bp->b_blkno, (int) wlabel));
589 biodone(bp);
590 return;
591 }
592 s = splbio(); /* XXX Needed? */
593 db1_printf(("Beginning strategy...\n"));
594
595 bp->b_resid = 0;
596 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
597 NULL, NULL, NULL);
598 if (bp->b_error) {
599 bp->b_flags |= B_ERROR;
600 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
601 bp->b_error));
602 }
603 splx(s);
604 #if 0
605 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
606 bp, bp->b_data,
607 (int) bp->b_bcount, (int) bp->b_resid));
608 #endif
609 }
610 /* ARGSUSED */
611 int
612 raidread(dev, uio, flags)
613 dev_t dev;
614 struct uio *uio;
615 int flags;
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619 int part;
620
621 if (unit >= numraid)
622 return (ENXIO);
623 rs = &raid_softc[unit];
624
625 if ((rs->sc_flags & RAIDF_INITED) == 0)
626 return (ENXIO);
627 part = DISKPART(dev);
628
629 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
630
631 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
632
633 }
634 /* ARGSUSED */
635 int
636 raidwrite(dev, uio, flags)
637 dev_t dev;
638 struct uio *uio;
639 int flags;
640 {
641 int unit = raidunit(dev);
642 struct raid_softc *rs;
643
644 if (unit >= numraid)
645 return (ENXIO);
646 rs = &raid_softc[unit];
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return (ENXIO);
650 db1_printf(("raidwrite\n"));
651 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
652
653 }
654
655 int
656 raidioctl(dev, cmd, data, flag, p)
657 dev_t dev;
658 u_long cmd;
659 caddr_t data;
660 int flag;
661 struct proc *p;
662 {
663 int unit = raidunit(dev);
664 int error = 0;
665 int part, pmask;
666 struct raid_softc *rs;
667 #if 0
668 int r, c;
669 #endif
670 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
671
672 /* struct ccdbuf *cbp; */
673 /* struct raidbuf *raidbp; */
674 RF_Config_t *k_cfg, *u_cfg;
675 u_char *specific_buf;
676 int retcode = 0;
677 int row;
678 int column;
679 struct rf_recon_req *rrcopy, *rr;
680 RF_ComponentLabel_t *component_label;
681 RF_ComponentLabel_t ci_label;
682 RF_ComponentLabel_t **c_label_ptr;
683 RF_SingleComponent_t *sparePtr,*componentPtr;
684 RF_SingleComponent_t hot_spare;
685 RF_SingleComponent_t component;
686
687 if (unit >= numraid)
688 return (ENXIO);
689 rs = &raid_softc[unit];
690
691 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
692 (int) DISKPART(dev), (int) unit, (int) cmd));
693
694 /* Must be open for writes for these commands... */
695 switch (cmd) {
696 case DIOCSDINFO:
697 case DIOCWDINFO:
698 case DIOCWLABEL:
699 if ((flag & FWRITE) == 0)
700 return (EBADF);
701 }
702
703 /* Must be initialized for these... */
704 switch (cmd) {
705 case DIOCGDINFO:
706 case DIOCSDINFO:
707 case DIOCWDINFO:
708 case DIOCGPART:
709 case DIOCWLABEL:
710 case DIOCGDEFLABEL:
711 case RAIDFRAME_SHUTDOWN:
712 case RAIDFRAME_REWRITEPARITY:
713 case RAIDFRAME_GET_INFO:
714 case RAIDFRAME_RESET_ACCTOTALS:
715 case RAIDFRAME_GET_ACCTOTALS:
716 case RAIDFRAME_KEEP_ACCTOTALS:
717 case RAIDFRAME_GET_SIZE:
718 case RAIDFRAME_FAIL_DISK:
719 case RAIDFRAME_COPYBACK:
720 case RAIDFRAME_CHECKRECON:
721 case RAIDFRAME_GET_COMPONENT_LABEL:
722 case RAIDFRAME_SET_COMPONENT_LABEL:
723 case RAIDFRAME_ADD_HOT_SPARE:
724 case RAIDFRAME_REMOVE_HOT_SPARE:
725 case RAIDFRAME_INIT_LABELS:
726 case RAIDFRAME_REBUILD_IN_PLACE:
727 if ((rs->sc_flags & RAIDF_INITED) == 0)
728 return (ENXIO);
729 }
730
731 switch (cmd) {
732
733
734 /* configure the system */
735 case RAIDFRAME_CONFIGURE:
736
737 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
738 /* copy-in the configuration information */
739 /* data points to a pointer to the configuration structure */
740 u_cfg = *((RF_Config_t **) data);
741 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
742 if (k_cfg == NULL) {
743 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
744 return (ENOMEM);
745 }
746 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
747 sizeof(RF_Config_t));
748 if (retcode) {
749 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
750 retcode));
751 return (retcode);
752 }
753 /* allocate a buffer for the layout-specific data, and copy it
754 * in */
755 if (k_cfg->layoutSpecificSize) {
756 if (k_cfg->layoutSpecificSize > 10000) {
757 /* sanity check */
758 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
759 return (EINVAL);
760 }
761 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
762 (u_char *));
763 if (specific_buf == NULL) {
764 RF_Free(k_cfg, sizeof(RF_Config_t));
765 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
766 return (ENOMEM);
767 }
768 retcode = copyin(k_cfg->layoutSpecific,
769 (caddr_t) specific_buf,
770 k_cfg->layoutSpecificSize);
771 if (retcode) {
772 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
773 retcode));
774 return (retcode);
775 }
776 } else
777 specific_buf = NULL;
778 k_cfg->layoutSpecific = specific_buf;
779
780 /* should do some kind of sanity check on the configuration.
781 * Store the sum of all the bytes in the last byte? */
782
783 #if 0
784 db1_printf(("Considering configuring the system.:%d 0x%x\n",
785 unit, p));
786 #endif
787
788 /* We need the pointer to this a little deeper, so stash it
789 * here... */
790
791 raidPtrs[unit]->proc = p;
792
793 /* configure the system */
794
795 raidPtrs[unit]->raidid = unit;
796 retcode = rf_Configure(raidPtrs[unit], k_cfg);
797
798
799 if (retcode == 0) {
800 retcode = raidinit(dev, raidPtrs[unit], unit);
801 rf_markalldirty( raidPtrs[unit] );
802 #if 0
803 /* register our shutdown hook */
804 if ((rs->sc_sdhook =
805 shutdownhook_establish(raid_shutdown,
806 raidPtrs[unit])) == NULL) {
807 printf("raid%d: WARNING: unable to establish shutdown hook\n",raidPtrs[unit]->raidid);
808 }
809 #endif
810
811 }
812 /* free the buffers. No return code here. */
813 if (k_cfg->layoutSpecificSize) {
814 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
815 }
816 RF_Free(k_cfg, sizeof(RF_Config_t));
817
818 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
819 retcode));
820
821 return (retcode);
822
823 /* shutdown the system */
824 case RAIDFRAME_SHUTDOWN:
825
826 if ((error = raidlock(rs)) != 0)
827 return (error);
828
829 /*
830 * If somebody has a partition mounted, we shouldn't
831 * shutdown.
832 */
833
834 part = DISKPART(dev);
835 pmask = (1 << part);
836 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
837 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
838 (rs->sc_dkdev.dk_copenmask & pmask))) {
839 raidunlock(rs);
840 return (EBUSY);
841 }
842
843 if (rf_debugKernelAccess) {
844 printf("call shutdown\n");
845 }
846 raidPtrs[unit]->proc = p; /* XXX necessary evil */
847
848 retcode = rf_Shutdown(raidPtrs[unit]);
849
850 db1_printf(("Done main shutdown\n"));
851
852 pool_destroy(&rs->sc_cbufpool);
853 db1_printf(("Done freeing component buffer freelist\n"));
854
855 /* It's no longer initialized... */
856 rs->sc_flags &= ~RAIDF_INITED;
857 #if 0
858 shutdownhook_disestablish( rs->sc_sdhook );
859 rs->sc_sdhook = NULL;
860 #endif
861 /* Detach the disk. */
862 disk_detach(&rs->sc_dkdev);
863
864 raidunlock(rs);
865
866 return (retcode);
867 case RAIDFRAME_GET_COMPONENT_LABEL:
868 c_label_ptr = (RF_ComponentLabel_t **) data;
869 /* need to read the component label for the disk indicated
870 by row,column in component_label
871 XXX need to sanity check these values!!!
872 */
873
874 /* For practice, let's get it directly fromdisk, rather
875 than from the in-core copy */
876 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
877 (RF_ComponentLabel_t *));
878 if (component_label == NULL)
879 return (ENOMEM);
880
881 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
882
883 retcode = copyin( *c_label_ptr, component_label,
884 sizeof(RF_ComponentLabel_t));
885
886 if (retcode) {
887 return(retcode);
888 }
889
890 row = component_label->row;
891 printf("Row: %d\n",row);
892 if (row > raidPtrs[unit]->numRow) {
893 row = 0; /* XXX */
894 }
895 column = component_label->column;
896 printf("Column: %d\n",column);
897 if (column > raidPtrs[unit]->numCol) {
898 column = 0; /* XXX */
899 }
900
901 raidread_component_label(
902 raidPtrs[unit]->Disks[row][column].dev,
903 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
904 component_label );
905
906 retcode = copyout((caddr_t) component_label,
907 (caddr_t) *c_label_ptr,
908 sizeof(RF_ComponentLabel_t));
909 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
910 return (retcode);
911
912 case RAIDFRAME_SET_COMPONENT_LABEL:
913 component_label = (RF_ComponentLabel_t *) data;
914
915 /* XXX check the label for valid stuff... */
916 /* Note that some things *should not* get modified --
917 the user should be re-initing the labels instead of
918 trying to patch things.
919 */
920
921 printf("Got component label:\n");
922 printf("Version: %d\n",component_label->version);
923 printf("Serial Number: %d\n",component_label->serial_number);
924 printf("Mod counter: %d\n",component_label->mod_counter);
925 printf("Row: %d\n", component_label->row);
926 printf("Column: %d\n", component_label->column);
927 printf("Num Rows: %d\n", component_label->num_rows);
928 printf("Num Columns: %d\n", component_label->num_columns);
929 printf("Clean: %d\n", component_label->clean);
930 printf("Status: %d\n", component_label->status);
931
932 row = component_label->row;
933 column = component_label->column;
934
935 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
936 (column < 0) || (column > raidPtrs[unit]->numCol)) {
937 return(EINVAL);
938 }
939
940 /* XXX this isn't allowed to do anything for now :-) */
941 #if 0
942 raidwrite_component_label(
943 raidPtrs[unit]->Disks[row][column].dev,
944 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
945 component_label );
946 #endif
947 return (0);
948
949 case RAIDFRAME_INIT_LABELS:
950 component_label = (RF_ComponentLabel_t *) data;
951 /*
952 we only want the serial number from
953 the above. We get all the rest of the information
954 from the config that was used to create this RAID
955 set.
956 */
957
958 raidPtrs[unit]->serial_number = component_label->serial_number;
959 /* current version number */
960 ci_label.version = RF_COMPONENT_LABEL_VERSION;
961 ci_label.serial_number = component_label->serial_number;
962 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
963 ci_label.num_rows = raidPtrs[unit]->numRow;
964 ci_label.num_columns = raidPtrs[unit]->numCol;
965 ci_label.clean = RF_RAID_DIRTY; /* not clean */
966 ci_label.status = rf_ds_optimal; /* "It's good!" */
967
968 for(row=0;row<raidPtrs[unit]->numRow;row++) {
969 ci_label.row = row;
970 for(column=0;column<raidPtrs[unit]->numCol;column++) {
971 ci_label.column = column;
972 raidwrite_component_label(
973 raidPtrs[unit]->Disks[row][column].dev,
974 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
975 &ci_label );
976 }
977 }
978
979 return (retcode);
980
981 /* initialize all parity */
982 case RAIDFRAME_REWRITEPARITY:
983
984 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
985 return (EINVAL);
986 /* borrow the thread of the requesting process */
987 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
988 retcode = rf_RewriteParity(raidPtrs[unit]);
989 /* return I/O Error if the parity rewrite fails */
990
991 if (retcode) {
992 retcode = EIO;
993 } else {
994 /* set the clean bit! If we shutdown correctly,
995 the clean bit on each component label will get
996 set */
997 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
998 }
999 return (retcode);
1000
1001
1002 case RAIDFRAME_ADD_HOT_SPARE:
1003 sparePtr = (RF_SingleComponent_t *) data;
1004 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1005 printf("Adding spare\n");
1006 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1007 return(retcode);
1008
1009 case RAIDFRAME_REMOVE_HOT_SPARE:
1010 return(retcode);
1011
1012 case RAIDFRAME_REBUILD_IN_PLACE:
1013 componentPtr = (RF_SingleComponent_t *) data;
1014 memcpy( &component, componentPtr,
1015 sizeof(RF_SingleComponent_t));
1016 row = component.row;
1017 column = component.column;
1018 printf("Rebuild: %d %d\n",row, column);
1019 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1020 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1021 return(EINVAL);
1022 }
1023 printf("Attempting a rebuild in place\n");
1024 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1025 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1026 return(retcode);
1027
1028 /* issue a test-unit-ready through raidframe to the indicated
1029 * device */
1030 #if 0 /* XXX not supported yet (ever?) */
1031 case RAIDFRAME_TUR:
1032 /* debug only */
1033 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1034 return (retcode);
1035 #endif
1036 case RAIDFRAME_GET_INFO:
1037 {
1038 RF_Raid_t *raid = raidPtrs[unit];
1039 RF_DeviceConfig_t *cfg, **ucfgp;
1040 int i, j, d;
1041
1042 if (!raid->valid)
1043 return (ENODEV);
1044 ucfgp = (RF_DeviceConfig_t **) data;
1045 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1046 (RF_DeviceConfig_t *));
1047 if (cfg == NULL)
1048 return (ENOMEM);
1049 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1050 cfg->rows = raid->numRow;
1051 cfg->cols = raid->numCol;
1052 cfg->ndevs = raid->numRow * raid->numCol;
1053 if (cfg->ndevs >= RF_MAX_DISKS) {
1054 cfg->ndevs = 0;
1055 return (ENOMEM);
1056 }
1057 cfg->nspares = raid->numSpare;
1058 if (cfg->nspares >= RF_MAX_DISKS) {
1059 cfg->nspares = 0;
1060 return (ENOMEM);
1061 }
1062 cfg->maxqdepth = raid->maxQueueDepth;
1063 d = 0;
1064 for (i = 0; i < cfg->rows; i++) {
1065 for (j = 0; j < cfg->cols; j++) {
1066 cfg->devs[d] = raid->Disks[i][j];
1067 d++;
1068 }
1069 }
1070 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1071 cfg->spares[i] = raid->Disks[0][j];
1072 }
1073 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1074 sizeof(RF_DeviceConfig_t));
1075 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1076
1077 return (retcode);
1078 }
1079 break;
1080
1081 case RAIDFRAME_RESET_ACCTOTALS:
1082 {
1083 RF_Raid_t *raid = raidPtrs[unit];
1084
1085 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1086 return (0);
1087 }
1088 break;
1089
1090 case RAIDFRAME_GET_ACCTOTALS:
1091 {
1092 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1093 RF_Raid_t *raid = raidPtrs[unit];
1094
1095 *totals = raid->acc_totals;
1096 return (0);
1097 }
1098 break;
1099
1100 case RAIDFRAME_KEEP_ACCTOTALS:
1101 {
1102 RF_Raid_t *raid = raidPtrs[unit];
1103 int *keep = (int *) data;
1104
1105 raid->keep_acc_totals = *keep;
1106 return (0);
1107 }
1108 break;
1109
1110 case RAIDFRAME_GET_SIZE:
1111 *(int *) data = raidPtrs[unit]->totalSectors;
1112 return (0);
1113
1114 #define RAIDFRAME_RECON 1
1115 /* XXX The above should probably be set somewhere else!! GO */
1116 #if RAIDFRAME_RECON > 0
1117
1118 /* fail a disk & optionally start reconstruction */
1119 case RAIDFRAME_FAIL_DISK:
1120 rr = (struct rf_recon_req *) data;
1121
1122 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1123 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1124 return (EINVAL);
1125
1126 printf("raid%d: Failing the disk: row: %d col: %d\n",
1127 unit, rr->row, rr->col);
1128
1129 /* make a copy of the recon request so that we don't rely on
1130 * the user's buffer */
1131 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1132 bcopy(rr, rrcopy, sizeof(*rr));
1133 rrcopy->raidPtr = (void *) raidPtrs[unit];
1134
1135 LOCK_RECON_Q_MUTEX();
1136 rrcopy->next = recon_queue;
1137 recon_queue = rrcopy;
1138 wakeup(&recon_queue);
1139 UNLOCK_RECON_Q_MUTEX();
1140
1141 return (0);
1142
1143 /* invoke a copyback operation after recon on whatever disk
1144 * needs it, if any */
1145 case RAIDFRAME_COPYBACK:
1146 /* borrow the current thread to get this done */
1147 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1148 rf_CopybackReconstructedData(raidPtrs[unit]);
1149 return (0);
1150
1151 /* return the percentage completion of reconstruction */
1152 case RAIDFRAME_CHECKRECON:
1153 row = *(int *) data;
1154 if (row < 0 || row >= raidPtrs[unit]->numRow)
1155 return (EINVAL);
1156 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1157 *(int *) data = 100;
1158 else
1159 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1160 return (0);
1161
1162 /* the sparetable daemon calls this to wait for the kernel to
1163 * need a spare table. this ioctl does not return until a
1164 * spare table is needed. XXX -- calling mpsleep here in the
1165 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1166 * -- I should either compute the spare table in the kernel,
1167 * or have a different -- XXX XXX -- interface (a different
1168 * character device) for delivering the table -- XXX */
1169 #if 0
1170 case RAIDFRAME_SPARET_WAIT:
1171 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1172 while (!rf_sparet_wait_queue)
1173 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1174 waitreq = rf_sparet_wait_queue;
1175 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1176 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1177
1178 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1179
1180 RF_Free(waitreq, sizeof(*waitreq));
1181 return (0);
1182
1183
1184 /* wakes up a process waiting on SPARET_WAIT and puts an error
1185 * code in it that will cause the dameon to exit */
1186 case RAIDFRAME_ABORT_SPARET_WAIT:
1187 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1188 waitreq->fcol = -1;
1189 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1190 waitreq->next = rf_sparet_wait_queue;
1191 rf_sparet_wait_queue = waitreq;
1192 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1193 wakeup(&rf_sparet_wait_queue);
1194 return (0);
1195
1196 /* used by the spare table daemon to deliver a spare table
1197 * into the kernel */
1198 case RAIDFRAME_SEND_SPARET:
1199
1200 /* install the spare table */
1201 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1202
1203 /* respond to the requestor. the return status of the spare
1204 * table installation is passed in the "fcol" field */
1205 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1206 waitreq->fcol = retcode;
1207 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1208 waitreq->next = rf_sparet_resp_queue;
1209 rf_sparet_resp_queue = waitreq;
1210 wakeup(&rf_sparet_resp_queue);
1211 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1212
1213 return (retcode);
1214 #endif
1215
1216
1217 #endif /* RAIDFRAME_RECON > 0 */
1218
1219 default:
1220 break; /* fall through to the os-specific code below */
1221
1222 }
1223
1224 if (!raidPtrs[unit]->valid)
1225 return (EINVAL);
1226
1227 /*
1228 * Add support for "regular" device ioctls here.
1229 */
1230
1231 switch (cmd) {
1232 case DIOCGDINFO:
1233 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1234 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1235 break;
1236
1237 case DIOCGPART:
1238 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1239 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1240 ((struct partinfo *) data)->part =
1241 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1242 break;
1243
1244 case DIOCWDINFO:
1245 db1_printf(("DIOCWDINFO\n"));
1246 case DIOCSDINFO:
1247 db1_printf(("DIOCSDINFO\n"));
1248 if ((error = raidlock(rs)) != 0)
1249 return (error);
1250
1251 rs->sc_flags |= RAIDF_LABELLING;
1252
1253 error = setdisklabel(rs->sc_dkdev.dk_label,
1254 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1255 if (error == 0) {
1256 if (cmd == DIOCWDINFO)
1257 error = writedisklabel(RAIDLABELDEV(dev),
1258 raidstrategy, rs->sc_dkdev.dk_label,
1259 rs->sc_dkdev.dk_cpulabel);
1260 }
1261 rs->sc_flags &= ~RAIDF_LABELLING;
1262
1263 raidunlock(rs);
1264
1265 if (error)
1266 return (error);
1267 break;
1268
1269 case DIOCWLABEL:
1270 db1_printf(("DIOCWLABEL\n"));
1271 if (*(int *) data != 0)
1272 rs->sc_flags |= RAIDF_WLABEL;
1273 else
1274 rs->sc_flags &= ~RAIDF_WLABEL;
1275 break;
1276
1277 case DIOCGDEFLABEL:
1278 db1_printf(("DIOCGDEFLABEL\n"));
1279 raidgetdefaultlabel(raidPtrs[unit], rs,
1280 (struct disklabel *) data);
1281 break;
1282
1283 default:
1284 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1285 }
1286 return (retcode);
1287
1288 }
1289
1290
1291 /* raidinit -- complete the rest of the initialization for the
1292 RAIDframe device. */
1293
1294
1295 static int
1296 raidinit(dev, raidPtr, unit)
1297 dev_t dev;
1298 RF_Raid_t *raidPtr;
1299 int unit;
1300 {
1301 int retcode;
1302 /* int ix; */
1303 /* struct raidbuf *raidbp; */
1304 struct raid_softc *rs;
1305
1306 retcode = 0;
1307
1308 rs = &raid_softc[unit];
1309 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1310 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1311
1312
1313 /* XXX should check return code first... */
1314 rs->sc_flags |= RAIDF_INITED;
1315
1316 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1317
1318 rs->sc_dkdev.dk_name = rs->sc_xname;
1319
1320 /* disk_attach actually creates space for the CPU disklabel, among
1321 * other things, so it's critical to call this *BEFORE* we try putzing
1322 * with disklabels. */
1323
1324 disk_attach(&rs->sc_dkdev);
1325
1326 /* XXX There may be a weird interaction here between this, and
1327 * protectedSectors, as used in RAIDframe. */
1328
1329 rs->sc_size = raidPtr->totalSectors;
1330 rs->sc_dev = dev;
1331
1332 return (retcode);
1333 }
1334
1335 void
1336 raid_shutdown(arg)
1337 void *arg;
1338 {
1339 RF_Raid_t *raidPtr = arg;
1340 struct raid_softc *rs;
1341
1342 /* This is called by out shutdown hook.
1343 The lights are being turned out, so lets shutdown as
1344 gracefully as possible */
1345
1346 rs = &raid_softc[raidPtr->raidid];
1347
1348 printf("raid%d: shutdown hooks called\n",raidPtr->raidid);
1349 rf_Shutdown(raidPtr);
1350
1351 /* It's no longer initialized... */
1352 rs->sc_flags &= ~RAIDF_INITED;
1353
1354
1355 }
1356
1357 /*
1358 * This kernel thread never exits. It is created once, and persists
1359 * until the system reboots.
1360 */
1361
1362 void
1363 rf_ReconKernelThread()
1364 {
1365 struct rf_recon_req *req;
1366 int s;
1367
1368 /* XXX not sure what spl() level we should be at here... probably
1369 * splbio() */
1370 s = splbio();
1371
1372 while (1) {
1373 /* grab the next reconstruction request from the queue */
1374 LOCK_RECON_Q_MUTEX();
1375 while (!recon_queue) {
1376 UNLOCK_RECON_Q_MUTEX();
1377 tsleep(&recon_queue, PRIBIO | PCATCH,
1378 "raidframe recon", 0);
1379 LOCK_RECON_Q_MUTEX();
1380 }
1381 req = recon_queue;
1382 recon_queue = recon_queue->next;
1383 UNLOCK_RECON_Q_MUTEX();
1384
1385 /*
1386 * If flags specifies that we should start recon, this call
1387 * will not return until reconstruction completes, fails,
1388 * or is aborted.
1389 */
1390 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1391 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1392
1393 RF_Free(req, sizeof(*req));
1394 }
1395 }
1396 /* wake up the daemon & tell it to get us a spare table
1397 * XXX
1398 * the entries in the queues should be tagged with the raidPtr
1399 * so that in the extremely rare case that two recons happen at once,
1400 * we know for which device were requesting a spare table
1401 * XXX
1402 */
1403 int
1404 rf_GetSpareTableFromDaemon(req)
1405 RF_SparetWait_t *req;
1406 {
1407 int retcode;
1408
1409 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1410 req->next = rf_sparet_wait_queue;
1411 rf_sparet_wait_queue = req;
1412 wakeup(&rf_sparet_wait_queue);
1413
1414 /* mpsleep unlocks the mutex */
1415 while (!rf_sparet_resp_queue) {
1416 tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
1417 "raidframe getsparetable", 0);
1418 #if 0
1419 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1420 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1421 MS_LOCK_SIMPLE);
1422 #endif
1423 }
1424 req = rf_sparet_resp_queue;
1425 rf_sparet_resp_queue = req->next;
1426 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1427
1428 retcode = req->fcol;
1429 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1430 * alloc'd */
1431 return (retcode);
1432 }
1433 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1434 * bp & passes it down.
1435 * any calls originating in the kernel must use non-blocking I/O
1436 * do some extra sanity checking to return "appropriate" error values for
1437 * certain conditions (to make some standard utilities work)
1438 */
1439 int
1440 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1441 RF_Raid_t *raidPtr;
1442 struct buf *bp;
1443 RF_RaidAccessFlags_t flags;
1444 void (*cbFunc) (struct buf *);
1445 void *cbArg;
1446 {
1447 RF_SectorCount_t num_blocks, pb, sum;
1448 RF_RaidAddr_t raid_addr;
1449 int retcode;
1450 struct partition *pp;
1451 daddr_t blocknum;
1452 int unit;
1453 struct raid_softc *rs;
1454 int do_async;
1455
1456 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1457
1458 unit = raidPtr->raidid;
1459 rs = &raid_softc[unit];
1460
1461 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1462 * partition.. Need to make it absolute to the underlying device.. */
1463
1464 blocknum = bp->b_blkno;
1465 if (DISKPART(bp->b_dev) != RAW_PART) {
1466 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1467 blocknum += pp->p_offset;
1468 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1469 pp->p_offset));
1470 } else {
1471 db1_printf(("Is raw..\n"));
1472 }
1473 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1474
1475 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1476 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1477
1478 /* *THIS* is where we adjust what block we're going to... but DO NOT
1479 * TOUCH bp->b_blkno!!! */
1480 raid_addr = blocknum;
1481
1482 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1483 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1484 sum = raid_addr + num_blocks + pb;
1485 if (1 || rf_debugKernelAccess) {
1486 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1487 (int) raid_addr, (int) sum, (int) num_blocks,
1488 (int) pb, (int) bp->b_resid));
1489 }
1490 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1491 || (sum < num_blocks) || (sum < pb)) {
1492 bp->b_error = ENOSPC;
1493 bp->b_flags |= B_ERROR;
1494 bp->b_resid = bp->b_bcount;
1495 biodone(bp);
1496 return (bp->b_error);
1497 }
1498 /*
1499 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1500 */
1501
1502 if (bp->b_bcount & raidPtr->sectorMask) {
1503 bp->b_error = EINVAL;
1504 bp->b_flags |= B_ERROR;
1505 bp->b_resid = bp->b_bcount;
1506 biodone(bp);
1507 return (bp->b_error);
1508 }
1509 db1_printf(("Calling DoAccess..\n"));
1510
1511 /*
1512 * XXX For now, all writes are sync
1513 */
1514 do_async = 1;
1515 if ((bp->b_flags & B_READ) == 0)
1516 do_async = 0;
1517
1518 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1519 * B_READ instead */
1520 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1521 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1522 do_async, raid_addr, num_blocks,
1523 bp->b_un.b_addr,
1524 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1525 NULL, cbFunc, cbArg);
1526 #if 0
1527 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1528 bp->b_data, (int) bp->b_resid));
1529 #endif
1530
1531 /*
1532 * If we requested sync I/O, sleep here.
1533 */
1534 if ((retcode == 0) && (do_async == 0))
1535 tsleep(bp, PRIBIO, "raidsyncio", 0);
1536
1537 return (retcode);
1538 }
1539 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1540
1541 int
1542 rf_DispatchKernelIO(queue, req)
1543 RF_DiskQueue_t *queue;
1544 RF_DiskQueueData_t *req;
1545 {
1546 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1547 struct buf *bp;
1548 struct raidbuf *raidbp = NULL;
1549 struct raid_softc *rs;
1550 int unit;
1551
1552 /* XXX along with the vnode, we also need the softc associated with
1553 * this device.. */
1554
1555 req->queue = queue;
1556
1557 unit = queue->raidPtr->raidid;
1558
1559 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1560
1561 if (unit >= numraid) {
1562 printf("Invalid unit number: %d %d\n", unit, numraid);
1563 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1564 }
1565 rs = &raid_softc[unit];
1566
1567 /* XXX is this the right place? */
1568 disk_busy(&rs->sc_dkdev);
1569
1570 bp = req->bp;
1571
1572 /* XXX when there is a physical disk failure, someone is passing us a
1573 * buffer that contains old stuff!! Attempt to deal with this problem
1574 * without taking a performance hit... (not sure where the real bug
1575 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1576
1577 if (bp->b_flags & B_ERROR) {
1578 bp->b_flags &= ~B_ERROR;
1579 }
1580 if (bp->b_error != 0) {
1581 bp->b_error = 0;
1582 }
1583 raidbp = RAIDGETBUF(rs);
1584
1585 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1586
1587 /*
1588 * context for raidiodone
1589 */
1590 raidbp->rf_obp = bp;
1591 raidbp->req = req;
1592
1593 switch (req->type) {
1594 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1595 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1596 * queue->row, queue->col); */
1597 /* XXX need to do something extra here.. */
1598 /* I'm leaving this in, as I've never actually seen it used,
1599 * and I'd like folks to report it... GO */
1600 printf(("WAKEUP CALLED\n"));
1601 queue->numOutstanding++;
1602
1603 /* XXX need to glue the original buffer into this?? */
1604
1605 KernelWakeupFunc(&raidbp->rf_buf);
1606 break;
1607
1608 case RF_IO_TYPE_READ:
1609 case RF_IO_TYPE_WRITE:
1610
1611 if (req->tracerec) {
1612 RF_ETIMER_START(req->tracerec->timer);
1613 }
1614 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1615 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1616 req->sectorOffset, req->numSector,
1617 req->buf, KernelWakeupFunc, (void *) req,
1618 queue->raidPtr->logBytesPerSector, req->b_proc);
1619
1620 if (rf_debugKernelAccess) {
1621 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1622 (long) bp->b_blkno));
1623 }
1624 queue->numOutstanding++;
1625 queue->last_deq_sector = req->sectorOffset;
1626 /* acc wouldn't have been let in if there were any pending
1627 * reqs at any other priority */
1628 queue->curPriority = req->priority;
1629 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1630 * req->type, queue->row, queue->col); */
1631
1632 db1_printf(("Going for %c to unit %d row %d col %d\n",
1633 req->type, unit, queue->row, queue->col));
1634 db1_printf(("sector %d count %d (%d bytes) %d\n",
1635 (int) req->sectorOffset, (int) req->numSector,
1636 (int) (req->numSector <<
1637 queue->raidPtr->logBytesPerSector),
1638 (int) queue->raidPtr->logBytesPerSector));
1639 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1640 raidbp->rf_buf.b_vp->v_numoutput++;
1641 }
1642 VOP_STRATEGY(&raidbp->rf_buf);
1643
1644 break;
1645
1646 default:
1647 panic("bad req->type in rf_DispatchKernelIO");
1648 }
1649 db1_printf(("Exiting from DispatchKernelIO\n"));
1650 return (0);
1651 }
1652 /* this is the callback function associated with a I/O invoked from
1653 kernel code.
1654 */
1655 static void
1656 KernelWakeupFunc(vbp)
1657 struct buf *vbp;
1658 {
1659 RF_DiskQueueData_t *req = NULL;
1660 RF_DiskQueue_t *queue;
1661 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1662 struct buf *bp;
1663 struct raid_softc *rs;
1664 int unit;
1665 register int s;
1666
1667 s = splbio(); /* XXX */
1668 db1_printf(("recovering the request queue:\n"));
1669 req = raidbp->req;
1670
1671 bp = raidbp->rf_obp;
1672 #if 0
1673 db1_printf(("bp=0x%x\n", bp));
1674 #endif
1675
1676 queue = (RF_DiskQueue_t *) req->queue;
1677
1678 if (raidbp->rf_buf.b_flags & B_ERROR) {
1679 #if 0
1680 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1681 #endif
1682 bp->b_flags |= B_ERROR;
1683 bp->b_error = raidbp->rf_buf.b_error ?
1684 raidbp->rf_buf.b_error : EIO;
1685 }
1686 #if 0
1687 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1688 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1689 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1690 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1691 #endif
1692
1693 /* XXX methinks this could be wrong... */
1694 #if 1
1695 bp->b_resid = raidbp->rf_buf.b_resid;
1696 #endif
1697
1698 if (req->tracerec) {
1699 RF_ETIMER_STOP(req->tracerec->timer);
1700 RF_ETIMER_EVAL(req->tracerec->timer);
1701 RF_LOCK_MUTEX(rf_tracing_mutex);
1702 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1703 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1704 req->tracerec->num_phys_ios++;
1705 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1706 }
1707 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1708
1709 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1710
1711
1712 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1713 * ballistic, and mark the component as hosed... */
1714 #if 1
1715 if (bp->b_flags & B_ERROR) {
1716 /* Mark the disk as dead */
1717 /* but only mark it once... */
1718 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1719 rf_ds_optimal) {
1720 printf("raid%d: IO Error. Marking %s as failed.\n",
1721 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1722 queue->raidPtr->Disks[queue->row][queue->col].status =
1723 rf_ds_failed;
1724 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1725 queue->raidPtr->numFailures++;
1726 /* XXX here we should bump the version number for each component, and write that data out */
1727 } else { /* Disk is already dead... */
1728 /* printf("Disk already marked as dead!\n"); */
1729 }
1730
1731 }
1732 #endif
1733
1734 rs = &raid_softc[unit];
1735 RAIDPUTBUF(rs, raidbp);
1736
1737
1738 if (bp->b_resid == 0) {
1739 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1740 unit, bp->b_resid, bp->b_bcount));
1741 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1742 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1743 } else {
1744 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1745 }
1746
1747 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1748 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1749 /* printf("Exiting KernelWakeupFunc\n"); */
1750
1751 splx(s); /* XXX */
1752 }
1753
1754
1755
1756 /*
1757 * initialize a buf structure for doing an I/O in the kernel.
1758 */
1759 static void
1760 InitBP(
1761 struct buf * bp,
1762 struct vnode * b_vp,
1763 unsigned rw_flag,
1764 dev_t dev,
1765 RF_SectorNum_t startSect,
1766 RF_SectorCount_t numSect,
1767 caddr_t buf,
1768 void (*cbFunc) (struct buf *),
1769 void *cbArg,
1770 int logBytesPerSector,
1771 struct proc * b_proc)
1772 {
1773 /* bp->b_flags = B_PHYS | rw_flag; */
1774 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1775 bp->b_bcount = numSect << logBytesPerSector;
1776 bp->b_bufsize = bp->b_bcount;
1777 bp->b_error = 0;
1778 bp->b_dev = dev;
1779 db1_printf(("bp->b_dev is %d\n", dev));
1780 bp->b_un.b_addr = buf;
1781 #if 0
1782 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1783 #endif
1784
1785 bp->b_blkno = startSect;
1786 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1787 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1788 if (bp->b_bcount == 0) {
1789 panic("bp->b_bcount is zero in InitBP!!\n");
1790 }
1791 bp->b_proc = b_proc;
1792 bp->b_iodone = cbFunc;
1793 bp->b_vp = b_vp;
1794
1795 }
1796 /* Extras... */
1797
1798 unsigned int
1799 rpcc()
1800 {
1801 /* XXX no clue what this is supposed to do.. my guess is that it's
1802 * supposed to read the CPU cycle counter... */
1803 /* db1_printf("this is supposed to do something useful too!??\n"); */
1804 return (0);
1805 }
1806 #if 0
1807 int
1808 rf_GetSpareTableFromDaemon(req)
1809 RF_SparetWait_t *req;
1810 {
1811 int retcode = 1;
1812 printf("This is supposed to do something useful!!\n"); /* XXX */
1813
1814 return (retcode);
1815
1816 }
1817 #endif
1818
1819 static void
1820 raidgetdefaultlabel(raidPtr, rs, lp)
1821 RF_Raid_t *raidPtr;
1822 struct raid_softc *rs;
1823 struct disklabel *lp;
1824 {
1825 db1_printf(("Building a default label...\n"));
1826 bzero(lp, sizeof(*lp));
1827
1828 /* fabricate a label... */
1829 lp->d_secperunit = raidPtr->totalSectors;
1830 lp->d_secsize = raidPtr->bytesPerSector;
1831 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1832 lp->d_ntracks = 1;
1833 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1834 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1835
1836 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1837 lp->d_type = DTYPE_RAID;
1838 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1839 lp->d_rpm = 3600;
1840 lp->d_interleave = 1;
1841 lp->d_flags = 0;
1842
1843 lp->d_partitions[RAW_PART].p_offset = 0;
1844 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1845 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1846 lp->d_npartitions = RAW_PART + 1;
1847
1848 lp->d_magic = DISKMAGIC;
1849 lp->d_magic2 = DISKMAGIC;
1850 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1851
1852 }
1853 /*
1854 * Read the disklabel from the raid device. If one is not present, fake one
1855 * up.
1856 */
1857 static void
1858 raidgetdisklabel(dev)
1859 dev_t dev;
1860 {
1861 int unit = raidunit(dev);
1862 struct raid_softc *rs = &raid_softc[unit];
1863 char *errstring;
1864 struct disklabel *lp = rs->sc_dkdev.dk_label;
1865 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1866 RF_Raid_t *raidPtr;
1867
1868 db1_printf(("Getting the disklabel...\n"));
1869
1870 bzero(clp, sizeof(*clp));
1871
1872 raidPtr = raidPtrs[unit];
1873
1874 raidgetdefaultlabel(raidPtr, rs, lp);
1875
1876 /*
1877 * Call the generic disklabel extraction routine.
1878 */
1879 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1880 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1881 if (errstring)
1882 raidmakedisklabel(rs);
1883 else {
1884 int i;
1885 struct partition *pp;
1886
1887 /*
1888 * Sanity check whether the found disklabel is valid.
1889 *
1890 * This is necessary since total size of the raid device
1891 * may vary when an interleave is changed even though exactly
1892 * same componets are used, and old disklabel may used
1893 * if that is found.
1894 */
1895 if (lp->d_secperunit != rs->sc_size)
1896 printf("WARNING: %s: "
1897 "total sector size in disklabel (%d) != "
1898 "the size of raid (%d)\n", rs->sc_xname,
1899 lp->d_secperunit, rs->sc_size);
1900 for (i = 0; i < lp->d_npartitions; i++) {
1901 pp = &lp->d_partitions[i];
1902 if (pp->p_offset + pp->p_size > rs->sc_size)
1903 printf("WARNING: %s: end of partition `%c' "
1904 "exceeds the size of raid (%d)\n",
1905 rs->sc_xname, 'a' + i, rs->sc_size);
1906 }
1907 }
1908
1909 }
1910 /*
1911 * Take care of things one might want to take care of in the event
1912 * that a disklabel isn't present.
1913 */
1914 static void
1915 raidmakedisklabel(rs)
1916 struct raid_softc *rs;
1917 {
1918 struct disklabel *lp = rs->sc_dkdev.dk_label;
1919 db1_printf(("Making a label..\n"));
1920
1921 /*
1922 * For historical reasons, if there's no disklabel present
1923 * the raw partition must be marked FS_BSDFFS.
1924 */
1925
1926 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1927
1928 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1929
1930 lp->d_checksum = dkcksum(lp);
1931 }
1932 /*
1933 * Lookup the provided name in the filesystem. If the file exists,
1934 * is a valid block device, and isn't being used by anyone else,
1935 * set *vpp to the file's vnode.
1936 * You'll find the original of this in ccd.c
1937 */
1938 int
1939 raidlookup(path, p, vpp)
1940 char *path;
1941 struct proc *p;
1942 struct vnode **vpp; /* result */
1943 {
1944 struct nameidata nd;
1945 struct vnode *vp;
1946 struct vattr va;
1947 int error;
1948
1949 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1950 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1951 #ifdef DEBUG
1952 printf("RAIDframe: vn_open returned %d\n", error);
1953 #endif
1954 return (error);
1955 }
1956 vp = nd.ni_vp;
1957 if (vp->v_usecount > 1) {
1958 VOP_UNLOCK(vp, 0);
1959 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1960 return (EBUSY);
1961 }
1962 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1963 VOP_UNLOCK(vp, 0);
1964 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1965 return (error);
1966 }
1967 /* XXX: eventually we should handle VREG, too. */
1968 if (va.va_type != VBLK) {
1969 VOP_UNLOCK(vp, 0);
1970 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1971 return (ENOTBLK);
1972 }
1973 VOP_UNLOCK(vp, 0);
1974 *vpp = vp;
1975 return (0);
1976 }
1977 /*
1978 * Wait interruptibly for an exclusive lock.
1979 *
1980 * XXX
1981 * Several drivers do this; it should be abstracted and made MP-safe.
1982 * (Hmm... where have we seen this warning before :-> GO )
1983 */
1984 static int
1985 raidlock(rs)
1986 struct raid_softc *rs;
1987 {
1988 int error;
1989
1990 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1991 rs->sc_flags |= RAIDF_WANTED;
1992 if ((error =
1993 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1994 return (error);
1995 }
1996 rs->sc_flags |= RAIDF_LOCKED;
1997 return (0);
1998 }
1999 /*
2000 * Unlock and wake up any waiters.
2001 */
2002 static void
2003 raidunlock(rs)
2004 struct raid_softc *rs;
2005 {
2006
2007 rs->sc_flags &= ~RAIDF_LOCKED;
2008 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2009 rs->sc_flags &= ~RAIDF_WANTED;
2010 wakeup(rs);
2011 }
2012 }
2013
2014
2015 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2016 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2017
2018 int
2019 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2020 {
2021 RF_ComponentLabel_t component_label;
2022 raidread_component_label(dev, b_vp, &component_label);
2023 component_label.mod_counter = mod_counter;
2024 component_label.clean = RF_RAID_CLEAN;
2025 raidwrite_component_label(dev, b_vp, &component_label);
2026 return(0);
2027 }
2028
2029
2030 int
2031 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2032 {
2033 RF_ComponentLabel_t component_label;
2034 raidread_component_label(dev, b_vp, &component_label);
2035 component_label.mod_counter = mod_counter;
2036 component_label.clean = RF_RAID_DIRTY;
2037 raidwrite_component_label(dev, b_vp, &component_label);
2038 return(0);
2039 }
2040
2041 /* ARGSUSED */
2042 int
2043 raidread_component_label(dev, b_vp, component_label)
2044 dev_t dev;
2045 struct vnode *b_vp;
2046 RF_ComponentLabel_t *component_label;
2047 {
2048 struct buf *bp;
2049 int error;
2050
2051 /* XXX should probably ensure that we don't try to do this if
2052 someone has changed rf_protected_sectors. */
2053
2054 /* get a block of the appropriate size... */
2055 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2056 bp->b_dev = dev;
2057
2058 /* get our ducks in a row for the read */
2059 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2060 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2061 bp->b_flags = B_BUSY | B_READ;
2062 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2063
2064 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2065
2066 error = biowait(bp);
2067
2068 if (!error) {
2069 memcpy(component_label, bp->b_un.b_addr,
2070 sizeof(RF_ComponentLabel_t));
2071 #if 0
2072 printf("raidread_component_label: got component label:\n");
2073 printf("Version: %d\n",component_label->version);
2074 printf("Serial Number: %d\n",component_label->serial_number);
2075 printf("Mod counter: %d\n",component_label->mod_counter);
2076 printf("Row: %d\n", component_label->row);
2077 printf("Column: %d\n", component_label->column);
2078 printf("Num Rows: %d\n", component_label->num_rows);
2079 printf("Num Columns: %d\n", component_label->num_columns);
2080 printf("Clean: %d\n", component_label->clean);
2081 printf("Status: %d\n", component_label->status);
2082 #endif
2083 } else {
2084 printf("Failed to read RAID component label!\n");
2085 }
2086
2087 bp->b_flags = B_INVAL | B_AGE;
2088 brelse(bp);
2089 return(error);
2090 }
2091 /* ARGSUSED */
2092 int
2093 raidwrite_component_label(dev, b_vp, component_label)
2094 dev_t dev;
2095 struct vnode *b_vp;
2096 RF_ComponentLabel_t *component_label;
2097 {
2098 struct buf *bp;
2099 int error;
2100
2101 /* get a block of the appropriate size... */
2102 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2103 bp->b_dev = dev;
2104
2105 /* get our ducks in a row for the write */
2106 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2107 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2108 bp->b_flags = B_BUSY | B_WRITE;
2109 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2110
2111 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2112
2113 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2114
2115 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2116 error = biowait(bp);
2117 bp->b_flags = B_INVAL | B_AGE;
2118 brelse(bp);
2119 if (error) {
2120 printf("Failed to write RAID component info!\n");
2121 }
2122
2123 return(error);
2124 }
2125
2126 void
2127 rf_markalldirty( raidPtr )
2128 RF_Raid_t *raidPtr;
2129 {
2130 RF_ComponentLabel_t c_label;
2131 int r,c;
2132
2133 raidPtr->mod_counter++;
2134 for (r = 0; r < raidPtr->numRow; r++) {
2135 for (c = 0; c < raidPtr->numCol; c++) {
2136 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2137 raidread_component_label(
2138 raidPtr->Disks[r][c].dev,
2139 raidPtr->raid_cinfo[r][c].ci_vp,
2140 &c_label);
2141 if (c_label.status == rf_ds_spared) {
2142 /* XXX do something special...
2143 but whatever you do, don't
2144 try to access it!! */
2145 } else {
2146 #if 0
2147 c_label.status =
2148 raidPtr->Disks[r][c].status;
2149 raidwrite_component_label(
2150 raidPtr->Disks[r][c].dev,
2151 raidPtr->raid_cinfo[r][c].ci_vp,
2152 &c_label);
2153 #endif
2154 raidmarkdirty(
2155 raidPtr->Disks[r][c].dev,
2156 raidPtr->raid_cinfo[r][c].ci_vp,
2157 raidPtr->mod_counter);
2158 }
2159 }
2160 }
2161 }
2162 /* printf("Component labels marked dirty.\n"); */
2163 #if 0
2164 for( c = 0; c < raidPtr->numSpare ; c++) {
2165 sparecol = raidPtr->numCol + c;
2166 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2167 /*
2168
2169 XXX this is where we get fancy and map this spare
2170 into it's correct spot in the array.
2171
2172 */
2173 /*
2174
2175 we claim this disk is "optimal" if it's
2176 rf_ds_used_spare, as that means it should be
2177 directly substitutable for the disk it replaced.
2178 We note that too...
2179
2180 */
2181
2182 for(i=0;i<raidPtr->numRow;i++) {
2183 for(j=0;j<raidPtr->numCol;j++) {
2184 if ((raidPtr->Disks[i][j].spareRow ==
2185 r) &&
2186 (raidPtr->Disks[i][j].spareCol ==
2187 sparecol)) {
2188 srow = r;
2189 scol = sparecol;
2190 break;
2191 }
2192 }
2193 }
2194
2195 raidread_component_label(
2196 raidPtr->Disks[r][sparecol].dev,
2197 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2198 &c_label);
2199 /* make sure status is noted */
2200 c_label.version = RF_COMPONENT_LABEL_VERSION;
2201 c_label.mod_counter = raidPtr->mod_counter;
2202 c_label.serial_number = raidPtr->serial_number;
2203 c_label.row = srow;
2204 c_label.column = scol;
2205 c_label.num_rows = raidPtr->numRow;
2206 c_label.num_columns = raidPtr->numCol;
2207 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2208 c_label.status = rf_ds_optimal;
2209 raidwrite_component_label(
2210 raidPtr->Disks[r][sparecol].dev,
2211 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2212 &c_label);
2213 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2214 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2215 }
2216 }
2217
2218 #endif
2219 }
2220
2221
2222 void
2223 rf_update_component_labels( raidPtr )
2224 RF_Raid_t *raidPtr;
2225 {
2226 RF_ComponentLabel_t c_label;
2227 int sparecol;
2228 int r,c;
2229 int i,j;
2230 int srow, scol;
2231
2232 srow = -1;
2233 scol = -1;
2234
2235 /* XXX should do extra checks to make sure things really are clean,
2236 rather than blindly setting the clean bit... */
2237
2238 raidPtr->mod_counter++;
2239
2240 for (r = 0; r < raidPtr->numRow; r++) {
2241 for (c = 0; c < raidPtr->numCol; c++) {
2242 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2243 raidread_component_label(
2244 raidPtr->Disks[r][c].dev,
2245 raidPtr->raid_cinfo[r][c].ci_vp,
2246 &c_label);
2247 /* make sure status is noted */
2248 c_label.status = rf_ds_optimal;
2249 raidwrite_component_label(
2250 raidPtr->Disks[r][c].dev,
2251 raidPtr->raid_cinfo[r][c].ci_vp,
2252 &c_label);
2253 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2254 raidmarkclean(
2255 raidPtr->Disks[r][c].dev,
2256 raidPtr->raid_cinfo[r][c].ci_vp,
2257 raidPtr->mod_counter);
2258 }
2259 }
2260 /* else we don't touch it.. */
2261 #if 0
2262 else if (raidPtr->Disks[r][c].status !=
2263 rf_ds_failed) {
2264 raidread_component_label(
2265 raidPtr->Disks[r][c].dev,
2266 raidPtr->raid_cinfo[r][c].ci_vp,
2267 &c_label);
2268 /* make sure status is noted */
2269 c_label.status =
2270 raidPtr->Disks[r][c].status;
2271 raidwrite_component_label(
2272 raidPtr->Disks[r][c].dev,
2273 raidPtr->raid_cinfo[r][c].ci_vp,
2274 &c_label);
2275 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2276 raidmarkclean(
2277 raidPtr->Disks[r][c].dev,
2278 raidPtr->raid_cinfo[r][c].ci_vp,
2279 raidPtr->mod_counter);
2280 }
2281 }
2282 #endif
2283 }
2284 }
2285
2286 for( c = 0; c < raidPtr->numSpare ; c++) {
2287 sparecol = raidPtr->numCol + c;
2288 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2289 /*
2290
2291 we claim this disk is "optimal" if it's
2292 rf_ds_used_spare, as that means it should be
2293 directly substitutable for the disk it replaced.
2294 We note that too...
2295
2296 */
2297
2298 for(i=0;i<raidPtr->numRow;i++) {
2299 for(j=0;j<raidPtr->numCol;j++) {
2300 if ((raidPtr->Disks[i][j].spareRow ==
2301 0) &&
2302 (raidPtr->Disks[i][j].spareCol ==
2303 sparecol)) {
2304 srow = i;
2305 scol = j;
2306 break;
2307 }
2308 }
2309 }
2310
2311 raidread_component_label(
2312 raidPtr->Disks[0][sparecol].dev,
2313 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2314 &c_label);
2315 /* make sure status is noted */
2316 c_label.version = RF_COMPONENT_LABEL_VERSION;
2317 c_label.mod_counter = raidPtr->mod_counter;
2318 c_label.serial_number = raidPtr->serial_number;
2319 c_label.row = srow;
2320 c_label.column = scol;
2321 c_label.num_rows = raidPtr->numRow;
2322 c_label.num_columns = raidPtr->numCol;
2323 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2324 c_label.status = rf_ds_optimal;
2325 raidwrite_component_label(
2326 raidPtr->Disks[0][sparecol].dev,
2327 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2328 &c_label);
2329 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2330 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2331 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2332 raidPtr->mod_counter);
2333 }
2334 }
2335 }
2336 /* printf("Component labels updated\n"); */
2337 }
2338