rf_netbsdkintf.c revision 1.12 1 /* $NetBSD: rf_netbsdkintf.c,v 1.12 1999/03/02 03:18:49 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raid_shutdown(void *);
217
218 void raidattach __P((int));
219 int raidsize __P((dev_t));
220
221 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
222 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
223 static int raidinit __P((dev_t, RF_Raid_t *, int));
224
225 int raidopen __P((dev_t, int, int, struct proc *));
226 int raidclose __P((dev_t, int, int, struct proc *));
227 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
228 int raidwrite __P((dev_t, struct uio *, int));
229 int raidread __P((dev_t, struct uio *, int));
230 void raidstrategy __P((struct buf *));
231 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
232
233 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
234 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
235
236 /*
237 * Pilfered from ccd.c
238 */
239
240 struct raidbuf {
241 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
242 struct buf *rf_obp; /* ptr. to original I/O buf */
243 int rf_flags; /* misc. flags */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247
248 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
249 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
250
251 /* XXX Not sure if the following should be replacing the raidPtrs above,
252 or if it should be used in conjunction with that... */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 dev_t sc_dev; /* our device.. */
259 void * sc_sdhook; /* our shutdown hook */
260 char sc_xname[20]; /* XXX external name */
261 struct disk sc_dkdev; /* generic disk device info */
262 struct pool sc_cbufpool; /* component buffer pool */
263 };
264 /* sc_flags */
265 #define RAIDF_INITED 0x01 /* unit has been initialized */
266 #define RAIDF_WLABEL 0x02 /* label area is writable */
267 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
268 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
269 #define RAIDF_LOCKED 0x80 /* unit is locked */
270
271 #define raidunit(x) DISKUNIT(x)
272 static int numraid = 0;
273
274 #define RAIDLABELDEV(dev) \
275 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
276
277 /* declared here, and made public, for the benefit of KVM stuff.. */
278 struct raid_softc *raid_softc;
279
280 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
281 struct disklabel *));
282 static void raidgetdisklabel __P((dev_t));
283 static void raidmakedisklabel __P((struct raid_softc *));
284
285 static int raidlock __P((struct raid_softc *));
286 static void raidunlock __P((struct raid_softc *));
287 int raidlookup __P((char *, struct proc * p, struct vnode **));
288
289 static void rf_markalldirty __P((RF_Raid_t *));
290
291 void
292 raidattach(num)
293 int num;
294 {
295 int raidID;
296
297 #ifdef DEBUG
298 printf("raidattach: Asked for %d units\n", num);
299 #endif
300
301 if (num <= 0) {
302 #ifdef DIAGNOSTIC
303 panic("raidattach: count <= 0");
304 #endif
305 return;
306 }
307 /* This is where all the initialization stuff gets done. */
308
309 /* Make some space for requested number of units... */
310
311 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
312 if (raidPtrs == NULL) {
313 panic("raidPtrs is NULL!!\n");
314 }
315 rf_kbooted = rf_boot();
316 if (rf_kbooted) {
317 panic("Serious error booting RAID!!\n");
318 }
319 rf_kbooted = RFK_BOOT_GOOD;
320
321 /* put together some datastructures like the CCD device does.. This
322 * lets us lock the device and what-not when it gets opened. */
323
324 raid_softc = (struct raid_softc *)
325 malloc(num * sizeof(struct raid_softc),
326 M_RAIDFRAME, M_NOWAIT);
327 if (raid_softc == NULL) {
328 printf("WARNING: no memory for RAIDframe driver\n");
329 return;
330 }
331 numraid = num;
332 bzero(raid_softc, num * sizeof(struct raid_softc));
333
334 for (raidID = 0; raidID < num; raidID++) {
335 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
336 (RF_Raid_t *));
337 if (raidPtrs[raidID] == NULL) {
338 printf("raidPtrs[%d] is NULL\n", raidID);
339 }
340 }
341 }
342
343
344 int
345 raidsize(dev)
346 dev_t dev;
347 {
348 struct raid_softc *rs;
349 struct disklabel *lp;
350 int part, unit, omask, size;
351
352 unit = raidunit(dev);
353 if (unit >= numraid)
354 return (-1);
355 rs = &raid_softc[unit];
356
357 if ((rs->sc_flags & RAIDF_INITED) == 0)
358 return (-1);
359
360 part = DISKPART(dev);
361 omask = rs->sc_dkdev.dk_openmask & (1 << part);
362 lp = rs->sc_dkdev.dk_label;
363
364 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
365 return (-1);
366
367 if (lp->d_partitions[part].p_fstype != FS_SWAP)
368 size = -1;
369 else
370 size = lp->d_partitions[part].p_size *
371 (lp->d_secsize / DEV_BSIZE);
372
373 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
374 return (-1);
375
376 return (size);
377
378 }
379
380 int
381 raiddump(dev, blkno, va, size)
382 dev_t dev;
383 daddr_t blkno;
384 caddr_t va;
385 size_t size;
386 {
387 /* Not implemented. */
388 return ENXIO;
389 }
390 /* ARGSUSED */
391 int
392 raidopen(dev, flags, fmt, p)
393 dev_t dev;
394 int flags, fmt;
395 struct proc *p;
396 {
397 int unit = raidunit(dev);
398 struct raid_softc *rs;
399 struct disklabel *lp;
400 int part, pmask;
401 unsigned int raidID;
402 int rc;
403 int error = 0;
404
405 /* This whole next chunk of code is somewhat suspect... Not sure it's
406 * needed here at all... XXX */
407
408 if (rf_kbooted == RFK_BOOT_NONE) {
409 printf("Doing restart on raidopen.\n");
410 rf_kbooted = RFK_BOOT_GOOD;
411 rc = rf_boot();
412 if (rc) {
413 rf_kbooted = RFK_BOOT_BAD;
414 printf("Someone is unhappy...\n");
415 return (rc);
416 }
417 }
418 if (unit >= numraid)
419 return (ENXIO);
420 rs = &raid_softc[unit];
421
422 if ((error = raidlock(rs)) != 0)
423 return (error);
424 lp = rs->sc_dkdev.dk_label;
425
426 raidID = raidunit(dev);
427
428 part = DISKPART(dev);
429 pmask = (1 << part);
430
431 db1_printf(("Opening raid device number: %d partition: %d\n",
432 raidID, part));
433
434
435 if ((rs->sc_flags & RAIDF_INITED) &&
436 (rs->sc_dkdev.dk_openmask == 0))
437 raidgetdisklabel(dev);
438
439 /* make sure that this partition exists */
440
441 if (part != RAW_PART) {
442 db1_printf(("Not a raw partition..\n"));
443 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
444 ((part >= lp->d_npartitions) ||
445 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
446 error = ENXIO;
447 raidunlock(rs);
448 db1_printf(("Bailing out...\n"));
449 return (error);
450 }
451 }
452 /* Prevent this unit from being unconfigured while open. */
453 switch (fmt) {
454 case S_IFCHR:
455 rs->sc_dkdev.dk_copenmask |= pmask;
456 break;
457
458 case S_IFBLK:
459 rs->sc_dkdev.dk_bopenmask |= pmask;
460 break;
461 }
462 rs->sc_dkdev.dk_openmask =
463 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
464
465 raidunlock(rs);
466
467 return (error);
468
469
470 }
471 /* ARGSUSED */
472 int
473 raidclose(dev, flags, fmt, p)
474 dev_t dev;
475 int flags, fmt;
476 struct proc *p;
477 {
478 int unit = raidunit(dev);
479 struct raid_softc *rs;
480 int error = 0;
481 int part;
482
483 if (unit >= numraid)
484 return (ENXIO);
485 rs = &raid_softc[unit];
486
487 if ((error = raidlock(rs)) != 0)
488 return (error);
489
490 part = DISKPART(dev);
491
492 /* ...that much closer to allowing unconfiguration... */
493 switch (fmt) {
494 case S_IFCHR:
495 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
496 break;
497
498 case S_IFBLK:
499 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
500 break;
501 }
502 rs->sc_dkdev.dk_openmask =
503 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
504
505 raidunlock(rs);
506 return (0);
507
508 }
509
510 void
511 raidstrategy(bp)
512 register struct buf *bp;
513 {
514 register int s;
515
516 unsigned int raidID = raidunit(bp->b_dev);
517 RF_Raid_t *raidPtr;
518 struct raid_softc *rs = &raid_softc[raidID];
519 struct disklabel *lp;
520 int wlabel;
521
522 #if 0
523 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
524 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
525 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
526 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
527 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
528
529 if (bp->b_flags & B_READ)
530 db1_printf(("READ\n"));
531 else
532 db1_printf(("WRITE\n"));
533 #endif
534 if (rf_kbooted != RFK_BOOT_GOOD)
535 return;
536 if (raidID >= numraid || !raidPtrs[raidID]) {
537 bp->b_error = ENODEV;
538 bp->b_flags |= B_ERROR;
539 bp->b_resid = bp->b_bcount;
540 biodone(bp);
541 return;
542 }
543 raidPtr = raidPtrs[raidID];
544 if (!raidPtr->valid) {
545 bp->b_error = ENODEV;
546 bp->b_flags |= B_ERROR;
547 bp->b_resid = bp->b_bcount;
548 biodone(bp);
549 return;
550 }
551 if (bp->b_bcount == 0) {
552 db1_printf(("b_bcount is zero..\n"));
553 biodone(bp);
554 return;
555 }
556 lp = rs->sc_dkdev.dk_label;
557
558 /*
559 * Do bounds checking and adjust transfer. If there's an
560 * error, the bounds check will flag that for us.
561 */
562
563 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
564 if (DISKPART(bp->b_dev) != RAW_PART)
565 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
566 db1_printf(("Bounds check failed!!:%d %d\n",
567 (int) bp->b_blkno, (int) wlabel));
568 biodone(bp);
569 return;
570 }
571 s = splbio(); /* XXX Needed? */
572 db1_printf(("Beginning strategy...\n"));
573
574 bp->b_resid = 0;
575 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
576 NULL, NULL, NULL);
577 if (bp->b_error) {
578 bp->b_flags |= B_ERROR;
579 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
580 bp->b_error));
581 }
582 splx(s);
583 #if 0
584 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
585 bp, bp->b_data,
586 (int) bp->b_bcount, (int) bp->b_resid));
587 #endif
588 }
589 /* ARGSUSED */
590 int
591 raidread(dev, uio, flags)
592 dev_t dev;
593 struct uio *uio;
594 int flags;
595 {
596 int unit = raidunit(dev);
597 struct raid_softc *rs;
598 int part;
599
600 if (unit >= numraid)
601 return (ENXIO);
602 rs = &raid_softc[unit];
603
604 if ((rs->sc_flags & RAIDF_INITED) == 0)
605 return (ENXIO);
606 part = DISKPART(dev);
607
608 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
609
610 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
611
612 }
613 /* ARGSUSED */
614 int
615 raidwrite(dev, uio, flags)
616 dev_t dev;
617 struct uio *uio;
618 int flags;
619 {
620 int unit = raidunit(dev);
621 struct raid_softc *rs;
622
623 if (unit >= numraid)
624 return (ENXIO);
625 rs = &raid_softc[unit];
626
627 if ((rs->sc_flags & RAIDF_INITED) == 0)
628 return (ENXIO);
629 db1_printf(("raidwrite\n"));
630 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
631
632 }
633
634 int
635 raidioctl(dev, cmd, data, flag, p)
636 dev_t dev;
637 u_long cmd;
638 caddr_t data;
639 int flag;
640 struct proc *p;
641 {
642 int unit = raidunit(dev);
643 int error = 0;
644 int part, pmask;
645 struct raid_softc *rs;
646 #if 0
647 int r, c;
648 #endif
649 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
650
651 /* struct ccdbuf *cbp; */
652 /* struct raidbuf *raidbp; */
653 RF_Config_t *k_cfg, *u_cfg;
654 u_char *specific_buf;
655 int retcode = 0;
656 int row;
657 int column;
658 struct rf_recon_req *rrcopy, *rr;
659 RF_ComponentLabel_t *component_label;
660 RF_ComponentLabel_t ci_label;
661 RF_ComponentLabel_t **c_label_ptr;
662 RF_SingleComponent_t *sparePtr,*componentPtr;
663 RF_SingleComponent_t hot_spare;
664 RF_SingleComponent_t component;
665
666 if (unit >= numraid)
667 return (ENXIO);
668 rs = &raid_softc[unit];
669
670 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
671 (int) DISKPART(dev), (int) unit, (int) cmd));
672
673 /* Must be open for writes for these commands... */
674 switch (cmd) {
675 case DIOCSDINFO:
676 case DIOCWDINFO:
677 case DIOCWLABEL:
678 if ((flag & FWRITE) == 0)
679 return (EBADF);
680 }
681
682 /* Must be initialized for these... */
683 switch (cmd) {
684 case DIOCGDINFO:
685 case DIOCSDINFO:
686 case DIOCWDINFO:
687 case DIOCGPART:
688 case DIOCWLABEL:
689 case DIOCGDEFLABEL:
690 case RAIDFRAME_SHUTDOWN:
691 case RAIDFRAME_REWRITEPARITY:
692 case RAIDFRAME_GET_INFO:
693 case RAIDFRAME_RESET_ACCTOTALS:
694 case RAIDFRAME_GET_ACCTOTALS:
695 case RAIDFRAME_KEEP_ACCTOTALS:
696 case RAIDFRAME_GET_SIZE:
697 case RAIDFRAME_FAIL_DISK:
698 case RAIDFRAME_COPYBACK:
699 case RAIDFRAME_CHECKRECON:
700 case RAIDFRAME_GET_COMPONENT_LABEL:
701 case RAIDFRAME_SET_COMPONENT_LABEL:
702 case RAIDFRAME_ADD_HOT_SPARE:
703 case RAIDFRAME_REMOVE_HOT_SPARE:
704 case RAIDFRAME_INIT_LABELS:
705 case RAIDFRAME_REBUILD_IN_PLACE:
706 if ((rs->sc_flags & RAIDF_INITED) == 0)
707 return (ENXIO);
708 }
709
710 switch (cmd) {
711
712
713 /* configure the system */
714 case RAIDFRAME_CONFIGURE:
715
716 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
717 /* copy-in the configuration information */
718 /* data points to a pointer to the configuration structure */
719 u_cfg = *((RF_Config_t **) data);
720 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
721 if (k_cfg == NULL) {
722 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
723 return (ENOMEM);
724 }
725 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
726 sizeof(RF_Config_t));
727 if (retcode) {
728 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
729 retcode));
730 return (retcode);
731 }
732 /* allocate a buffer for the layout-specific data, and copy it
733 * in */
734 if (k_cfg->layoutSpecificSize) {
735 if (k_cfg->layoutSpecificSize > 10000) {
736 /* sanity check */
737 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
738 return (EINVAL);
739 }
740 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
741 (u_char *));
742 if (specific_buf == NULL) {
743 RF_Free(k_cfg, sizeof(RF_Config_t));
744 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
745 return (ENOMEM);
746 }
747 retcode = copyin(k_cfg->layoutSpecific,
748 (caddr_t) specific_buf,
749 k_cfg->layoutSpecificSize);
750 if (retcode) {
751 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
752 retcode));
753 return (retcode);
754 }
755 } else
756 specific_buf = NULL;
757 k_cfg->layoutSpecific = specific_buf;
758
759 /* should do some kind of sanity check on the configuration.
760 * Store the sum of all the bytes in the last byte? */
761
762 #if 0
763 db1_printf(("Considering configuring the system.:%d 0x%x\n",
764 unit, p));
765 #endif
766
767 /* We need the pointer to this a little deeper, so stash it
768 * here... */
769
770 raidPtrs[unit]->proc = p;
771
772 /* configure the system */
773
774 raidPtrs[unit]->raidid = unit;
775 retcode = rf_Configure(raidPtrs[unit], k_cfg);
776
777
778 if (retcode == 0) {
779 retcode = raidinit(dev, raidPtrs[unit], unit);
780 rf_markalldirty( raidPtrs[unit] );
781 /* register our shutdown hook */
782 if ((rs->sc_sdhook =
783 shutdownhook_establish(raid_shutdown,
784 raidPtrs[unit])) == NULL) {
785 printf("raid%d: WARNING: unable to establish shutdown hook\n",raidPtrs[unit]->raidid);
786 }
787
788
789 }
790 /* free the buffers. No return code here. */
791 if (k_cfg->layoutSpecificSize) {
792 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
793 }
794 RF_Free(k_cfg, sizeof(RF_Config_t));
795
796 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
797 retcode));
798
799 return (retcode);
800
801 /* shutdown the system */
802 case RAIDFRAME_SHUTDOWN:
803
804 if ((error = raidlock(rs)) != 0)
805 return (error);
806
807 /*
808 * If somebody has a partition mounted, we shouldn't
809 * shutdown.
810 */
811
812 part = DISKPART(dev);
813 pmask = (1 << part);
814 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
815 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
816 (rs->sc_dkdev.dk_copenmask & pmask))) {
817 raidunlock(rs);
818 return (EBUSY);
819 }
820
821 if (rf_debugKernelAccess) {
822 printf("call shutdown\n");
823 }
824 raidPtrs[unit]->proc = p; /* XXX necessary evil */
825
826 retcode = rf_Shutdown(raidPtrs[unit]);
827
828 db1_printf(("Done main shutdown\n"));
829
830 pool_destroy(&rs->sc_cbufpool);
831 db1_printf(("Done freeing component buffer freelist\n"));
832
833 /* It's no longer initialized... */
834 rs->sc_flags &= ~RAIDF_INITED;
835
836 shutdownhook_disestablish( rs->sc_sdhook );
837 rs->sc_sdhook = NULL;
838
839 /* Detach the disk. */
840 disk_detach(&rs->sc_dkdev);
841
842 raidunlock(rs);
843
844 return (retcode);
845 case RAIDFRAME_GET_COMPONENT_LABEL:
846 c_label_ptr = (RF_ComponentLabel_t **) data;
847 /* need to read the component label for the disk indicated
848 by row,column in component_label
849 XXX need to sanity check these values!!!
850 */
851
852 /* For practice, let's get it directly fromdisk, rather
853 than from the in-core copy */
854 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
855 (RF_ComponentLabel_t *));
856 if (component_label == NULL)
857 return (ENOMEM);
858
859 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
860
861 retcode = copyin( *c_label_ptr, component_label,
862 sizeof(RF_ComponentLabel_t));
863
864 if (retcode) {
865 return(retcode);
866 }
867
868 row = component_label->row;
869 printf("Row: %d\n",row);
870 if (row > raidPtrs[unit]->numRow) {
871 row = 0; /* XXX */
872 }
873 column = component_label->column;
874 printf("Column: %d\n",column);
875 if (column > raidPtrs[unit]->numCol) {
876 column = 0; /* XXX */
877 }
878
879 raidread_component_label(
880 raidPtrs[unit]->Disks[row][column].dev,
881 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
882 component_label );
883
884 retcode = copyout((caddr_t) component_label,
885 (caddr_t) *c_label_ptr,
886 sizeof(RF_ComponentLabel_t));
887 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
888 return (retcode);
889
890 case RAIDFRAME_SET_COMPONENT_LABEL:
891 component_label = (RF_ComponentLabel_t *) data;
892
893 /* XXX check the label for valid stuff... */
894 /* Note that some things *should not* get modified --
895 the user should be re-initing the labels instead of
896 trying to patch things.
897 */
898
899 printf("Got component label:\n");
900 printf("Version: %d\n",component_label->version);
901 printf("Serial Number: %d\n",component_label->serial_number);
902 printf("Mod counter: %d\n",component_label->mod_counter);
903 printf("Row: %d\n", component_label->row);
904 printf("Column: %d\n", component_label->column);
905 printf("Num Rows: %d\n", component_label->num_rows);
906 printf("Num Columns: %d\n", component_label->num_columns);
907 printf("Clean: %d\n", component_label->clean);
908 printf("Status: %d\n", component_label->status);
909
910 row = component_label->row;
911 column = component_label->column;
912
913 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
914 (column < 0) || (column > raidPtrs[unit]->numCol)) {
915 return(EINVAL);
916 }
917
918 /* XXX this isn't allowed to do anything for now :-) */
919 #if 0
920 raidwrite_component_label(
921 raidPtrs[unit]->Disks[row][column].dev,
922 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
923 component_label );
924 #endif
925 return (0);
926
927 case RAIDFRAME_INIT_LABELS:
928 component_label = (RF_ComponentLabel_t *) data;
929 /*
930 we only want the serial number from
931 the above. We get all the rest of the information
932 from the config that was used to create this RAID
933 set.
934 */
935
936 raidPtrs[unit]->serial_number = component_label->serial_number;
937 /* current version number */
938 ci_label.version = RF_COMPONENT_LABEL_VERSION;
939 ci_label.serial_number = component_label->serial_number;
940 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
941 ci_label.num_rows = raidPtrs[unit]->numRow;
942 ci_label.num_columns = raidPtrs[unit]->numCol;
943 ci_label.clean = RF_RAID_DIRTY; /* not clean */
944 ci_label.status = rf_ds_optimal; /* "It's good!" */
945
946 for(row=0;row<raidPtrs[unit]->numRow;row++) {
947 ci_label.row = row;
948 for(column=0;column<raidPtrs[unit]->numCol;column++) {
949 ci_label.column = column;
950 raidwrite_component_label(
951 raidPtrs[unit]->Disks[row][column].dev,
952 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
953 &ci_label );
954 }
955 }
956
957 return (retcode);
958
959 /* initialize all parity */
960 case RAIDFRAME_REWRITEPARITY:
961
962 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
963 return (EINVAL);
964 /* borrow the thread of the requesting process */
965 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
966 retcode = rf_RewriteParity(raidPtrs[unit]);
967 /* return I/O Error if the parity rewrite fails */
968
969 if (retcode) {
970 retcode = EIO;
971 } else {
972 /* set the clean bit! If we shutdown correctly,
973 the clean bit on each component label will get
974 set */
975 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
976 }
977 return (retcode);
978
979
980 case RAIDFRAME_ADD_HOT_SPARE:
981 sparePtr = (RF_SingleComponent_t *) data;
982 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
983 printf("Adding spare\n");
984 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
985 return(retcode);
986
987 case RAIDFRAME_REMOVE_HOT_SPARE:
988 return(retcode);
989
990 case RAIDFRAME_REBUILD_IN_PLACE:
991 componentPtr = (RF_SingleComponent_t *) data;
992 memcpy( &component, componentPtr,
993 sizeof(RF_SingleComponent_t));
994 row = component.row;
995 column = component.column;
996 printf("Rebuild: %d %d\n",row, column);
997 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
998 (column < 0) || (column > raidPtrs[unit]->numCol)) {
999 return(EINVAL);
1000 }
1001 printf("Attempting a rebuild in place\n");
1002 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1003 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1004 return(retcode);
1005
1006 /* issue a test-unit-ready through raidframe to the indicated
1007 * device */
1008 #if 0 /* XXX not supported yet (ever?) */
1009 case RAIDFRAME_TUR:
1010 /* debug only */
1011 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1012 return (retcode);
1013 #endif
1014 case RAIDFRAME_GET_INFO:
1015 {
1016 RF_Raid_t *raid = raidPtrs[unit];
1017 RF_DeviceConfig_t *cfg, **ucfgp;
1018 int i, j, d;
1019
1020 if (!raid->valid)
1021 return (ENODEV);
1022 ucfgp = (RF_DeviceConfig_t **) data;
1023 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1024 (RF_DeviceConfig_t *));
1025 if (cfg == NULL)
1026 return (ENOMEM);
1027 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1028 cfg->rows = raid->numRow;
1029 cfg->cols = raid->numCol;
1030 cfg->ndevs = raid->numRow * raid->numCol;
1031 if (cfg->ndevs >= RF_MAX_DISKS) {
1032 cfg->ndevs = 0;
1033 return (ENOMEM);
1034 }
1035 cfg->nspares = raid->numSpare;
1036 if (cfg->nspares >= RF_MAX_DISKS) {
1037 cfg->nspares = 0;
1038 return (ENOMEM);
1039 }
1040 cfg->maxqdepth = raid->maxQueueDepth;
1041 d = 0;
1042 for (i = 0; i < cfg->rows; i++) {
1043 for (j = 0; j < cfg->cols; j++) {
1044 cfg->devs[d] = raid->Disks[i][j];
1045 d++;
1046 }
1047 }
1048 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1049 cfg->spares[i] = raid->Disks[0][j];
1050 }
1051 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1052 sizeof(RF_DeviceConfig_t));
1053 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1054
1055 return (retcode);
1056 }
1057 break;
1058
1059 case RAIDFRAME_RESET_ACCTOTALS:
1060 {
1061 RF_Raid_t *raid = raidPtrs[unit];
1062
1063 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1064 return (0);
1065 }
1066 break;
1067
1068 case RAIDFRAME_GET_ACCTOTALS:
1069 {
1070 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1071 RF_Raid_t *raid = raidPtrs[unit];
1072
1073 *totals = raid->acc_totals;
1074 return (0);
1075 }
1076 break;
1077
1078 case RAIDFRAME_KEEP_ACCTOTALS:
1079 {
1080 RF_Raid_t *raid = raidPtrs[unit];
1081 int *keep = (int *) data;
1082
1083 raid->keep_acc_totals = *keep;
1084 return (0);
1085 }
1086 break;
1087
1088 case RAIDFRAME_GET_SIZE:
1089 *(int *) data = raidPtrs[unit]->totalSectors;
1090 return (0);
1091
1092 #define RAIDFRAME_RECON 1
1093 /* XXX The above should probably be set somewhere else!! GO */
1094 #if RAIDFRAME_RECON > 0
1095
1096 /* fail a disk & optionally start reconstruction */
1097 case RAIDFRAME_FAIL_DISK:
1098 rr = (struct rf_recon_req *) data;
1099
1100 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1101 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1102 return (EINVAL);
1103
1104 printf("raid%d: Failing the disk: row: %d col: %d\n",
1105 unit, rr->row, rr->col);
1106
1107 /* make a copy of the recon request so that we don't rely on
1108 * the user's buffer */
1109 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1110 bcopy(rr, rrcopy, sizeof(*rr));
1111 rrcopy->raidPtr = (void *) raidPtrs[unit];
1112
1113 LOCK_RECON_Q_MUTEX();
1114 rrcopy->next = recon_queue;
1115 recon_queue = rrcopy;
1116 wakeup(&recon_queue);
1117 UNLOCK_RECON_Q_MUTEX();
1118
1119 return (0);
1120
1121 /* invoke a copyback operation after recon on whatever disk
1122 * needs it, if any */
1123 case RAIDFRAME_COPYBACK:
1124 /* borrow the current thread to get this done */
1125 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1126 rf_CopybackReconstructedData(raidPtrs[unit]);
1127 return (0);
1128
1129 /* return the percentage completion of reconstruction */
1130 case RAIDFRAME_CHECKRECON:
1131 row = *(int *) data;
1132 if (row < 0 || row >= raidPtrs[unit]->numRow)
1133 return (EINVAL);
1134 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1135 *(int *) data = 100;
1136 else
1137 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1138 return (0);
1139
1140 /* the sparetable daemon calls this to wait for the kernel to
1141 * need a spare table. this ioctl does not return until a
1142 * spare table is needed. XXX -- calling mpsleep here in the
1143 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1144 * -- I should either compute the spare table in the kernel,
1145 * or have a different -- XXX XXX -- interface (a different
1146 * character device) for delivering the table -- XXX */
1147 #if 0
1148 case RAIDFRAME_SPARET_WAIT:
1149 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1150 while (!rf_sparet_wait_queue)
1151 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1152 waitreq = rf_sparet_wait_queue;
1153 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1154 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1155
1156 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1157
1158 RF_Free(waitreq, sizeof(*waitreq));
1159 return (0);
1160
1161
1162 /* wakes up a process waiting on SPARET_WAIT and puts an error
1163 * code in it that will cause the dameon to exit */
1164 case RAIDFRAME_ABORT_SPARET_WAIT:
1165 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1166 waitreq->fcol = -1;
1167 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1168 waitreq->next = rf_sparet_wait_queue;
1169 rf_sparet_wait_queue = waitreq;
1170 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1171 wakeup(&rf_sparet_wait_queue);
1172 return (0);
1173
1174 /* used by the spare table daemon to deliver a spare table
1175 * into the kernel */
1176 case RAIDFRAME_SEND_SPARET:
1177
1178 /* install the spare table */
1179 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1180
1181 /* respond to the requestor. the return status of the spare
1182 * table installation is passed in the "fcol" field */
1183 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1184 waitreq->fcol = retcode;
1185 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1186 waitreq->next = rf_sparet_resp_queue;
1187 rf_sparet_resp_queue = waitreq;
1188 wakeup(&rf_sparet_resp_queue);
1189 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1190
1191 return (retcode);
1192 #endif
1193
1194
1195 #endif /* RAIDFRAME_RECON > 0 */
1196
1197 default:
1198 break; /* fall through to the os-specific code below */
1199
1200 }
1201
1202 if (!raidPtrs[unit]->valid)
1203 return (EINVAL);
1204
1205 /*
1206 * Add support for "regular" device ioctls here.
1207 */
1208
1209 switch (cmd) {
1210 case DIOCGDINFO:
1211 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1212 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1213 break;
1214
1215 case DIOCGPART:
1216 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1217 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1218 ((struct partinfo *) data)->part =
1219 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1220 break;
1221
1222 case DIOCWDINFO:
1223 db1_printf(("DIOCWDINFO\n"));
1224 case DIOCSDINFO:
1225 db1_printf(("DIOCSDINFO\n"));
1226 if ((error = raidlock(rs)) != 0)
1227 return (error);
1228
1229 rs->sc_flags |= RAIDF_LABELLING;
1230
1231 error = setdisklabel(rs->sc_dkdev.dk_label,
1232 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1233 if (error == 0) {
1234 if (cmd == DIOCWDINFO)
1235 error = writedisklabel(RAIDLABELDEV(dev),
1236 raidstrategy, rs->sc_dkdev.dk_label,
1237 rs->sc_dkdev.dk_cpulabel);
1238 }
1239 rs->sc_flags &= ~RAIDF_LABELLING;
1240
1241 raidunlock(rs);
1242
1243 if (error)
1244 return (error);
1245 break;
1246
1247 case DIOCWLABEL:
1248 db1_printf(("DIOCWLABEL\n"));
1249 if (*(int *) data != 0)
1250 rs->sc_flags |= RAIDF_WLABEL;
1251 else
1252 rs->sc_flags &= ~RAIDF_WLABEL;
1253 break;
1254
1255 case DIOCGDEFLABEL:
1256 db1_printf(("DIOCGDEFLABEL\n"));
1257 raidgetdefaultlabel(raidPtrs[unit], rs,
1258 (struct disklabel *) data);
1259 break;
1260
1261 default:
1262 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1263 }
1264 return (retcode);
1265
1266 }
1267
1268
1269 /* raidinit -- complete the rest of the initialization for the
1270 RAIDframe device. */
1271
1272
1273 static int
1274 raidinit(dev, raidPtr, unit)
1275 dev_t dev;
1276 RF_Raid_t *raidPtr;
1277 int unit;
1278 {
1279 int retcode;
1280 /* int ix; */
1281 /* struct raidbuf *raidbp; */
1282 struct raid_softc *rs;
1283
1284 retcode = 0;
1285
1286 rs = &raid_softc[unit];
1287 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1288 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1289
1290
1291 /* XXX should check return code first... */
1292 rs->sc_flags |= RAIDF_INITED;
1293
1294 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1295
1296 rs->sc_dkdev.dk_name = rs->sc_xname;
1297
1298 /* disk_attach actually creates space for the CPU disklabel, among
1299 * other things, so it's critical to call this *BEFORE* we try putzing
1300 * with disklabels. */
1301
1302 disk_attach(&rs->sc_dkdev);
1303
1304 /* XXX There may be a weird interaction here between this, and
1305 * protectedSectors, as used in RAIDframe. */
1306
1307 rs->sc_size = raidPtr->totalSectors;
1308 rs->sc_dev = dev;
1309
1310 return (retcode);
1311 }
1312
1313 void
1314 raid_shutdown(arg)
1315 void *arg;
1316 {
1317 RF_Raid_t *raidPtr = arg;
1318 struct raid_softc *rs;
1319
1320 /* This is called by out shutdown hook.
1321 The lights are being turned out, so lets shutdown as
1322 gracefully as possible */
1323
1324 rs = &raid_softc[raidPtr->raidid];
1325
1326 printf("raid%d: shutdown hooks called\n",raidPtr->raidid);
1327 rf_Shutdown(raidPtr);
1328
1329 /* It's no longer initialized... */
1330 rs->sc_flags &= ~RAIDF_INITED;
1331
1332
1333 }
1334
1335
1336 /*********************************************************
1337 *
1338 * initialization code called at boot time (startup.c)
1339 *
1340 ********************************************************/
1341 int
1342 rf_boot()
1343 {
1344 int i, rc;
1345
1346 rc = rf_mutex_init(&rf_sparet_wait_mutex);
1347 if (rc) {
1348 RF_PANIC();
1349 }
1350
1351 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
1352 recon_queue = NULL;
1353
1354 for (i = 0; i < numraid; i++)
1355 raidPtrs[i] = NULL;
1356 rc = rf_BootRaidframe();
1357 if (rc == 0)
1358 printf("Kernelized RAIDframe activated\n");
1359 else
1360 rf_kbooted = RFK_BOOT_BAD;
1361 return (rc);
1362 }
1363
1364 /*
1365 * This kernel thread never exits. It is created once, and persists
1366 * until the system reboots.
1367 */
1368
1369 void
1370 rf_ReconKernelThread()
1371 {
1372 struct rf_recon_req *req;
1373 int s;
1374
1375 /* XXX not sure what spl() level we should be at here... probably
1376 * splbio() */
1377 s = splbio();
1378
1379 while (1) {
1380 /* grab the next reconstruction request from the queue */
1381 LOCK_RECON_Q_MUTEX();
1382 while (!recon_queue) {
1383 UNLOCK_RECON_Q_MUTEX();
1384 tsleep(&recon_queue, PRIBIO | PCATCH,
1385 "raidframe recon", 0);
1386 LOCK_RECON_Q_MUTEX();
1387 }
1388 req = recon_queue;
1389 recon_queue = recon_queue->next;
1390 UNLOCK_RECON_Q_MUTEX();
1391
1392 /*
1393 * If flags specifies that we should start recon, this call
1394 * will not return until reconstruction completes, fails,
1395 * or is aborted.
1396 */
1397 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1398 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1399
1400 RF_Free(req, sizeof(*req));
1401 }
1402 }
1403 /* wake up the daemon & tell it to get us a spare table
1404 * XXX
1405 * the entries in the queues should be tagged with the raidPtr
1406 * so that in the extremely rare case that two recons happen at once,
1407 * we know for which device were requesting a spare table
1408 * XXX
1409 */
1410 int
1411 rf_GetSpareTableFromDaemon(req)
1412 RF_SparetWait_t *req;
1413 {
1414 int retcode;
1415
1416 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1417 req->next = rf_sparet_wait_queue;
1418 rf_sparet_wait_queue = req;
1419 wakeup(&rf_sparet_wait_queue);
1420
1421 /* mpsleep unlocks the mutex */
1422 while (!rf_sparet_resp_queue) {
1423 tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
1424 "raidframe getsparetable", 0);
1425 #if 0
1426 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1427 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1428 MS_LOCK_SIMPLE);
1429 #endif
1430 }
1431 req = rf_sparet_resp_queue;
1432 rf_sparet_resp_queue = req->next;
1433 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1434
1435 retcode = req->fcol;
1436 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1437 * alloc'd */
1438 return (retcode);
1439 }
1440 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1441 * bp & passes it down.
1442 * any calls originating in the kernel must use non-blocking I/O
1443 * do some extra sanity checking to return "appropriate" error values for
1444 * certain conditions (to make some standard utilities work)
1445 */
1446 int
1447 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1448 RF_Raid_t *raidPtr;
1449 struct buf *bp;
1450 RF_RaidAccessFlags_t flags;
1451 void (*cbFunc) (struct buf *);
1452 void *cbArg;
1453 {
1454 RF_SectorCount_t num_blocks, pb, sum;
1455 RF_RaidAddr_t raid_addr;
1456 int retcode;
1457 struct partition *pp;
1458 daddr_t blocknum;
1459 int unit;
1460 struct raid_softc *rs;
1461 int do_async;
1462
1463 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1464
1465 unit = raidPtr->raidid;
1466 rs = &raid_softc[unit];
1467
1468 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1469 * partition.. Need to make it absolute to the underlying device.. */
1470
1471 blocknum = bp->b_blkno;
1472 if (DISKPART(bp->b_dev) != RAW_PART) {
1473 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1474 blocknum += pp->p_offset;
1475 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1476 pp->p_offset));
1477 } else {
1478 db1_printf(("Is raw..\n"));
1479 }
1480 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1481
1482 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1483 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1484
1485 /* *THIS* is where we adjust what block we're going to... but DO NOT
1486 * TOUCH bp->b_blkno!!! */
1487 raid_addr = blocknum;
1488
1489 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1490 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1491 sum = raid_addr + num_blocks + pb;
1492 if (1 || rf_debugKernelAccess) {
1493 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1494 (int) raid_addr, (int) sum, (int) num_blocks,
1495 (int) pb, (int) bp->b_resid));
1496 }
1497 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1498 || (sum < num_blocks) || (sum < pb)) {
1499 bp->b_error = ENOSPC;
1500 bp->b_flags |= B_ERROR;
1501 bp->b_resid = bp->b_bcount;
1502 biodone(bp);
1503 return (bp->b_error);
1504 }
1505 /*
1506 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1507 */
1508
1509 if (bp->b_bcount & raidPtr->sectorMask) {
1510 bp->b_error = EINVAL;
1511 bp->b_flags |= B_ERROR;
1512 bp->b_resid = bp->b_bcount;
1513 biodone(bp);
1514 return (bp->b_error);
1515 }
1516 db1_printf(("Calling DoAccess..\n"));
1517
1518 /*
1519 * XXX For now, all writes are sync
1520 */
1521 do_async = 1;
1522 if ((bp->b_flags & B_READ) == 0)
1523 do_async = 0;
1524
1525 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1526 * B_READ instead */
1527 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1528 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1529 do_async, raid_addr, num_blocks,
1530 bp->b_un.b_addr,
1531 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1532 NULL, cbFunc, cbArg);
1533 #if 0
1534 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1535 bp->b_data, (int) bp->b_resid));
1536 #endif
1537
1538 /*
1539 * If we requested sync I/O, sleep here.
1540 */
1541 if ((retcode == 0) && (do_async == 0))
1542 tsleep(bp, PRIBIO, "raidsyncio", 0);
1543
1544 return (retcode);
1545 }
1546 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1547
1548 int
1549 rf_DispatchKernelIO(queue, req)
1550 RF_DiskQueue_t *queue;
1551 RF_DiskQueueData_t *req;
1552 {
1553 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1554 struct buf *bp;
1555 struct raidbuf *raidbp = NULL;
1556 struct raid_softc *rs;
1557 int unit;
1558
1559 /* XXX along with the vnode, we also need the softc associated with
1560 * this device.. */
1561
1562 req->queue = queue;
1563
1564 unit = queue->raidPtr->raidid;
1565
1566 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1567
1568 if (unit >= numraid) {
1569 printf("Invalid unit number: %d %d\n", unit, numraid);
1570 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1571 }
1572 rs = &raid_softc[unit];
1573
1574 /* XXX is this the right place? */
1575 disk_busy(&rs->sc_dkdev);
1576
1577 bp = req->bp;
1578
1579 /* XXX when there is a physical disk failure, someone is passing us a
1580 * buffer that contains old stuff!! Attempt to deal with this problem
1581 * without taking a performance hit... (not sure where the real bug
1582 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1583
1584 if (bp->b_flags & B_ERROR) {
1585 bp->b_flags &= ~B_ERROR;
1586 }
1587 if (bp->b_error != 0) {
1588 bp->b_error = 0;
1589 }
1590 raidbp = RAIDGETBUF(rs);
1591
1592 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1593
1594 /*
1595 * context for raidiodone
1596 */
1597 raidbp->rf_obp = bp;
1598 raidbp->req = req;
1599
1600 switch (req->type) {
1601 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1602 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1603 * queue->row, queue->col); */
1604 /* XXX need to do something extra here.. */
1605 /* I'm leaving this in, as I've never actually seen it used,
1606 * and I'd like folks to report it... GO */
1607 printf(("WAKEUP CALLED\n"));
1608 queue->numOutstanding++;
1609
1610 /* XXX need to glue the original buffer into this?? */
1611
1612 KernelWakeupFunc(&raidbp->rf_buf);
1613 break;
1614
1615 case RF_IO_TYPE_READ:
1616 case RF_IO_TYPE_WRITE:
1617
1618 if (req->tracerec) {
1619 RF_ETIMER_START(req->tracerec->timer);
1620 }
1621 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1622 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1623 req->sectorOffset, req->numSector,
1624 req->buf, KernelWakeupFunc, (void *) req,
1625 queue->raidPtr->logBytesPerSector, req->b_proc);
1626
1627 if (rf_debugKernelAccess) {
1628 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1629 (long) bp->b_blkno));
1630 }
1631 queue->numOutstanding++;
1632 queue->last_deq_sector = req->sectorOffset;
1633 /* acc wouldn't have been let in if there were any pending
1634 * reqs at any other priority */
1635 queue->curPriority = req->priority;
1636 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1637 * req->type, queue->row, queue->col); */
1638
1639 db1_printf(("Going for %c to unit %d row %d col %d\n",
1640 req->type, unit, queue->row, queue->col));
1641 db1_printf(("sector %d count %d (%d bytes) %d\n",
1642 (int) req->sectorOffset, (int) req->numSector,
1643 (int) (req->numSector <<
1644 queue->raidPtr->logBytesPerSector),
1645 (int) queue->raidPtr->logBytesPerSector));
1646 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1647 raidbp->rf_buf.b_vp->v_numoutput++;
1648 }
1649 VOP_STRATEGY(&raidbp->rf_buf);
1650
1651 break;
1652
1653 default:
1654 panic("bad req->type in rf_DispatchKernelIO");
1655 }
1656 db1_printf(("Exiting from DispatchKernelIO\n"));
1657 return (0);
1658 }
1659 /* this is the callback function associated with a I/O invoked from
1660 kernel code.
1661 */
1662 static void
1663 KernelWakeupFunc(vbp)
1664 struct buf *vbp;
1665 {
1666 RF_DiskQueueData_t *req = NULL;
1667 RF_DiskQueue_t *queue;
1668 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1669 struct buf *bp;
1670 struct raid_softc *rs;
1671 int unit;
1672 register int s;
1673
1674 s = splbio(); /* XXX */
1675 db1_printf(("recovering the request queue:\n"));
1676 req = raidbp->req;
1677
1678 bp = raidbp->rf_obp;
1679 #if 0
1680 db1_printf(("bp=0x%x\n", bp));
1681 #endif
1682
1683 queue = (RF_DiskQueue_t *) req->queue;
1684
1685 if (raidbp->rf_buf.b_flags & B_ERROR) {
1686 #if 0
1687 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1688 #endif
1689 bp->b_flags |= B_ERROR;
1690 bp->b_error = raidbp->rf_buf.b_error ?
1691 raidbp->rf_buf.b_error : EIO;
1692 }
1693 #if 0
1694 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1695 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1696 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1697 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1698 #endif
1699
1700 /* XXX methinks this could be wrong... */
1701 #if 1
1702 bp->b_resid = raidbp->rf_buf.b_resid;
1703 #endif
1704
1705 if (req->tracerec) {
1706 RF_ETIMER_STOP(req->tracerec->timer);
1707 RF_ETIMER_EVAL(req->tracerec->timer);
1708 RF_LOCK_MUTEX(rf_tracing_mutex);
1709 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1710 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1711 req->tracerec->num_phys_ios++;
1712 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1713 }
1714 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1715
1716 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1717
1718
1719 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1720 * ballistic, and mark the component as hosed... */
1721 #if 1
1722 if (bp->b_flags & B_ERROR) {
1723 /* Mark the disk as dead */
1724 /* but only mark it once... */
1725 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1726 rf_ds_optimal) {
1727 printf("raid%d: IO Error. Marking %s as failed.\n",
1728 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1729 queue->raidPtr->Disks[queue->row][queue->col].status =
1730 rf_ds_failed;
1731 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1732 queue->raidPtr->numFailures++;
1733 /* XXX here we should bump the version number for each component, and write that data out */
1734 } else { /* Disk is already dead... */
1735 /* printf("Disk already marked as dead!\n"); */
1736 }
1737
1738 }
1739 #endif
1740
1741 rs = &raid_softc[unit];
1742 RAIDPUTBUF(rs, raidbp);
1743
1744
1745 if (bp->b_resid == 0) {
1746 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1747 unit, bp->b_resid, bp->b_bcount));
1748 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1749 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1750 } else {
1751 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1752 }
1753
1754 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1755 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1756 /* printf("Exiting KernelWakeupFunc\n"); */
1757
1758 splx(s); /* XXX */
1759 }
1760
1761
1762
1763 /*
1764 * initialize a buf structure for doing an I/O in the kernel.
1765 */
1766 static void
1767 InitBP(
1768 struct buf * bp,
1769 struct vnode * b_vp,
1770 unsigned rw_flag,
1771 dev_t dev,
1772 RF_SectorNum_t startSect,
1773 RF_SectorCount_t numSect,
1774 caddr_t buf,
1775 void (*cbFunc) (struct buf *),
1776 void *cbArg,
1777 int logBytesPerSector,
1778 struct proc * b_proc)
1779 {
1780 /* bp->b_flags = B_PHYS | rw_flag; */
1781 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1782 bp->b_bcount = numSect << logBytesPerSector;
1783 bp->b_bufsize = bp->b_bcount;
1784 bp->b_error = 0;
1785 bp->b_dev = dev;
1786 db1_printf(("bp->b_dev is %d\n", dev));
1787 bp->b_un.b_addr = buf;
1788 #if 0
1789 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1790 #endif
1791
1792 bp->b_blkno = startSect;
1793 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1794 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1795 if (bp->b_bcount == 0) {
1796 panic("bp->b_bcount is zero in InitBP!!\n");
1797 }
1798 bp->b_proc = b_proc;
1799 bp->b_iodone = cbFunc;
1800 bp->b_vp = b_vp;
1801
1802 }
1803 /* Extras... */
1804
1805 unsigned int
1806 rpcc()
1807 {
1808 /* XXX no clue what this is supposed to do.. my guess is that it's
1809 * supposed to read the CPU cycle counter... */
1810 /* db1_printf("this is supposed to do something useful too!??\n"); */
1811 return (0);
1812 }
1813 #if 0
1814 int
1815 rf_GetSpareTableFromDaemon(req)
1816 RF_SparetWait_t *req;
1817 {
1818 int retcode = 1;
1819 printf("This is supposed to do something useful!!\n"); /* XXX */
1820
1821 return (retcode);
1822
1823 }
1824 #endif
1825
1826 static void
1827 raidgetdefaultlabel(raidPtr, rs, lp)
1828 RF_Raid_t *raidPtr;
1829 struct raid_softc *rs;
1830 struct disklabel *lp;
1831 {
1832 db1_printf(("Building a default label...\n"));
1833 bzero(lp, sizeof(*lp));
1834
1835 /* fabricate a label... */
1836 lp->d_secperunit = raidPtr->totalSectors;
1837 lp->d_secsize = raidPtr->bytesPerSector;
1838 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1839 lp->d_ntracks = 1;
1840 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1841 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1842
1843 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1844 lp->d_type = DTYPE_RAID;
1845 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1846 lp->d_rpm = 3600;
1847 lp->d_interleave = 1;
1848 lp->d_flags = 0;
1849
1850 lp->d_partitions[RAW_PART].p_offset = 0;
1851 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1852 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1853 lp->d_npartitions = RAW_PART + 1;
1854
1855 lp->d_magic = DISKMAGIC;
1856 lp->d_magic2 = DISKMAGIC;
1857 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1858
1859 }
1860 /*
1861 * Read the disklabel from the raid device. If one is not present, fake one
1862 * up.
1863 */
1864 static void
1865 raidgetdisklabel(dev)
1866 dev_t dev;
1867 {
1868 int unit = raidunit(dev);
1869 struct raid_softc *rs = &raid_softc[unit];
1870 char *errstring;
1871 struct disklabel *lp = rs->sc_dkdev.dk_label;
1872 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1873 RF_Raid_t *raidPtr;
1874
1875 db1_printf(("Getting the disklabel...\n"));
1876
1877 bzero(clp, sizeof(*clp));
1878
1879 raidPtr = raidPtrs[unit];
1880
1881 raidgetdefaultlabel(raidPtr, rs, lp);
1882
1883 /*
1884 * Call the generic disklabel extraction routine.
1885 */
1886 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1887 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1888 if (errstring)
1889 raidmakedisklabel(rs);
1890 else {
1891 int i;
1892 struct partition *pp;
1893
1894 /*
1895 * Sanity check whether the found disklabel is valid.
1896 *
1897 * This is necessary since total size of the raid device
1898 * may vary when an interleave is changed even though exactly
1899 * same componets are used, and old disklabel may used
1900 * if that is found.
1901 */
1902 if (lp->d_secperunit != rs->sc_size)
1903 printf("WARNING: %s: "
1904 "total sector size in disklabel (%d) != "
1905 "the size of raid (%d)\n", rs->sc_xname,
1906 lp->d_secperunit, rs->sc_size);
1907 for (i = 0; i < lp->d_npartitions; i++) {
1908 pp = &lp->d_partitions[i];
1909 if (pp->p_offset + pp->p_size > rs->sc_size)
1910 printf("WARNING: %s: end of partition `%c' "
1911 "exceeds the size of raid (%d)\n",
1912 rs->sc_xname, 'a' + i, rs->sc_size);
1913 }
1914 }
1915
1916 }
1917 /*
1918 * Take care of things one might want to take care of in the event
1919 * that a disklabel isn't present.
1920 */
1921 static void
1922 raidmakedisklabel(rs)
1923 struct raid_softc *rs;
1924 {
1925 struct disklabel *lp = rs->sc_dkdev.dk_label;
1926 db1_printf(("Making a label..\n"));
1927
1928 /*
1929 * For historical reasons, if there's no disklabel present
1930 * the raw partition must be marked FS_BSDFFS.
1931 */
1932
1933 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1934
1935 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1936
1937 lp->d_checksum = dkcksum(lp);
1938 }
1939 /*
1940 * Lookup the provided name in the filesystem. If the file exists,
1941 * is a valid block device, and isn't being used by anyone else,
1942 * set *vpp to the file's vnode.
1943 * You'll find the original of this in ccd.c
1944 */
1945 int
1946 raidlookup(path, p, vpp)
1947 char *path;
1948 struct proc *p;
1949 struct vnode **vpp; /* result */
1950 {
1951 struct nameidata nd;
1952 struct vnode *vp;
1953 struct vattr va;
1954 int error;
1955
1956 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1957 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1958 #ifdef DEBUG
1959 printf("RAIDframe: vn_open returned %d\n", error);
1960 #endif
1961 return (error);
1962 }
1963 vp = nd.ni_vp;
1964 if (vp->v_usecount > 1) {
1965 VOP_UNLOCK(vp, 0);
1966 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1967 return (EBUSY);
1968 }
1969 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1970 VOP_UNLOCK(vp, 0);
1971 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1972 return (error);
1973 }
1974 /* XXX: eventually we should handle VREG, too. */
1975 if (va.va_type != VBLK) {
1976 VOP_UNLOCK(vp, 0);
1977 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1978 return (ENOTBLK);
1979 }
1980 VOP_UNLOCK(vp, 0);
1981 *vpp = vp;
1982 return (0);
1983 }
1984 /*
1985 * Wait interruptibly for an exclusive lock.
1986 *
1987 * XXX
1988 * Several drivers do this; it should be abstracted and made MP-safe.
1989 * (Hmm... where have we seen this warning before :-> GO )
1990 */
1991 static int
1992 raidlock(rs)
1993 struct raid_softc *rs;
1994 {
1995 int error;
1996
1997 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1998 rs->sc_flags |= RAIDF_WANTED;
1999 if ((error =
2000 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2001 return (error);
2002 }
2003 rs->sc_flags |= RAIDF_LOCKED;
2004 return (0);
2005 }
2006 /*
2007 * Unlock and wake up any waiters.
2008 */
2009 static void
2010 raidunlock(rs)
2011 struct raid_softc *rs;
2012 {
2013
2014 rs->sc_flags &= ~RAIDF_LOCKED;
2015 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2016 rs->sc_flags &= ~RAIDF_WANTED;
2017 wakeup(rs);
2018 }
2019 }
2020
2021
2022 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2023 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2024
2025 int
2026 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2027 {
2028 RF_ComponentLabel_t component_label;
2029 raidread_component_label(dev, b_vp, &component_label);
2030 component_label.mod_counter = mod_counter;
2031 component_label.clean = RF_RAID_CLEAN;
2032 raidwrite_component_label(dev, b_vp, &component_label);
2033 return(0);
2034 }
2035
2036
2037 int
2038 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2039 {
2040 RF_ComponentLabel_t component_label;
2041 raidread_component_label(dev, b_vp, &component_label);
2042 component_label.mod_counter = mod_counter;
2043 component_label.clean = RF_RAID_DIRTY;
2044 raidwrite_component_label(dev, b_vp, &component_label);
2045 return(0);
2046 }
2047
2048 /* ARGSUSED */
2049 int
2050 raidread_component_label(dev, b_vp, component_label)
2051 dev_t dev;
2052 struct vnode *b_vp;
2053 RF_ComponentLabel_t *component_label;
2054 {
2055 struct buf *bp;
2056 int error;
2057
2058 /* XXX should probably ensure that we don't try to do this if
2059 someone has changed rf_protected_sectors. */
2060
2061 /* get a block of the appropriate size... */
2062 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2063 bp->b_dev = dev;
2064
2065 /* get our ducks in a row for the read */
2066 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2067 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2068 bp->b_flags = B_BUSY | B_READ;
2069 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2070
2071 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2072
2073 error = biowait(bp);
2074
2075 if (!error) {
2076 memcpy(component_label, bp->b_un.b_addr,
2077 sizeof(RF_ComponentLabel_t));
2078 #if 0
2079 printf("raidread_component_label: got component label:\n");
2080 printf("Version: %d\n",component_label->version);
2081 printf("Serial Number: %d\n",component_label->serial_number);
2082 printf("Mod counter: %d\n",component_label->mod_counter);
2083 printf("Row: %d\n", component_label->row);
2084 printf("Column: %d\n", component_label->column);
2085 printf("Num Rows: %d\n", component_label->num_rows);
2086 printf("Num Columns: %d\n", component_label->num_columns);
2087 printf("Clean: %d\n", component_label->clean);
2088 printf("Status: %d\n", component_label->status);
2089 #endif
2090 } else {
2091 printf("Failed to read RAID component label!\n");
2092 }
2093
2094 bp->b_flags = B_INVAL | B_AGE;
2095 brelse(bp);
2096 return(error);
2097 }
2098 /* ARGSUSED */
2099 int
2100 raidwrite_component_label(dev, b_vp, component_label)
2101 dev_t dev;
2102 struct vnode *b_vp;
2103 RF_ComponentLabel_t *component_label;
2104 {
2105 struct buf *bp;
2106 int error;
2107
2108 /* get a block of the appropriate size... */
2109 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2110 bp->b_dev = dev;
2111
2112 /* get our ducks in a row for the write */
2113 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2114 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2115 bp->b_flags = B_BUSY | B_WRITE;
2116 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2117
2118 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2119
2120 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2121
2122 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2123 error = biowait(bp);
2124 bp->b_flags = B_INVAL | B_AGE;
2125 brelse(bp);
2126 if (error) {
2127 printf("Failed to write RAID component info!\n");
2128 }
2129
2130 return(error);
2131 }
2132
2133 void
2134 rf_markalldirty( raidPtr )
2135 RF_Raid_t *raidPtr;
2136 {
2137 RF_ComponentLabel_t c_label;
2138 int r,c;
2139
2140 raidPtr->mod_counter++;
2141 for (r = 0; r < raidPtr->numRow; r++) {
2142 for (c = 0; c < raidPtr->numCol; c++) {
2143 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2144 raidread_component_label(
2145 raidPtr->Disks[r][c].dev,
2146 raidPtr->raid_cinfo[r][c].ci_vp,
2147 &c_label);
2148 if (c_label.status == rf_ds_spared) {
2149 /* XXX do something special...
2150 but whatever you do, don't
2151 try to access it!! */
2152 } else {
2153 #if 0
2154 c_label.status =
2155 raidPtr->Disks[r][c].status;
2156 raidwrite_component_label(
2157 raidPtr->Disks[r][c].dev,
2158 raidPtr->raid_cinfo[r][c].ci_vp,
2159 &c_label);
2160 #endif
2161 raidmarkdirty(
2162 raidPtr->Disks[r][c].dev,
2163 raidPtr->raid_cinfo[r][c].ci_vp,
2164 raidPtr->mod_counter);
2165 }
2166 }
2167 }
2168 }
2169 #if 0
2170 for( c = 0; c < raidPtr->numSpare ; c++) {
2171 sparecol = raidPtr->numCol + c;
2172 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2173 /*
2174
2175 XXX this is where we get fancy and map this spare
2176 into it's correct spot in the array.
2177
2178 */
2179 /*
2180
2181 we claim this disk is "optimal" if it's
2182 rf_ds_used_spare, as that means it should be
2183 directly substitutable for the disk it replaced.
2184 We note that too...
2185
2186 */
2187
2188 for(i=0;i<raidPtr->numRow;i++) {
2189 for(j=0;j<raidPtr->numCol;j++) {
2190 if ((raidPtr->Disks[i][j].spareRow ==
2191 r) &&
2192 (raidPtr->Disks[i][j].spareCol ==
2193 sparecol)) {
2194 srow = r;
2195 scol = sparecol;
2196 break;
2197 }
2198 }
2199 }
2200
2201 raidread_component_label(
2202 raidPtr->Disks[r][sparecol].dev,
2203 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2204 &c_label);
2205 /* make sure status is noted */
2206 c_label.version = RF_COMPONENT_LABEL_VERSION;
2207 c_label.mod_counter = raidPtr->mod_counter;
2208 c_label.serial_number = raidPtr->serial_number;
2209 c_label.row = srow;
2210 c_label.column = scol;
2211 c_label.num_rows = raidPtr->numRow;
2212 c_label.num_columns = raidPtr->numCol;
2213 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2214 c_label.status = rf_ds_optimal;
2215 raidwrite_component_label(
2216 raidPtr->Disks[r][sparecol].dev,
2217 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2218 &c_label);
2219 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2220 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2221 }
2222 }
2223
2224 #endif
2225 }
2226
2227