ccd.c revision 1.143.10.3 1 /* $NetBSD: ccd.c,v 1.143.10.3 2014/08/20 00:03:35 tls Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * "Concatenated" disk driver.
72 *
73 * Notes on concurrency:
74 *
75 * => sc_dvlock serializes access to the device nodes, excluding block I/O.
76 *
77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats,
78 * sc_stop, sc_bufq and b_resid from master buffers.
79 *
80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to
81 * serialize I/O and configuration changes.
82 *
83 * => the in-core disk label does not change while the device is open.
84 *
85 * On memory consumption: ccd fans out I/O requests and so needs to
86 * allocate memory. If the system is desperately low on memory, we
87 * single thread I/O.
88 */
89
90 #include <sys/cdefs.h>
91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.143.10.3 2014/08/20 00:03:35 tls Exp $");
92
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/proc.h>
97 #include <sys/errno.h>
98 #include <sys/buf.h>
99 #include <sys/kmem.h>
100 #include <sys/pool.h>
101 #include <sys/module.h>
102 #include <sys/namei.h>
103 #include <sys/stat.h>
104 #include <sys/ioctl.h>
105 #include <sys/disklabel.h>
106 #include <sys/device.h>
107 #include <sys/disk.h>
108 #include <sys/syslog.h>
109 #include <sys/fcntl.h>
110 #include <sys/vnode.h>
111 #include <sys/conf.h>
112 #include <sys/mutex.h>
113 #include <sys/queue.h>
114 #include <sys/kauth.h>
115 #include <sys/kthread.h>
116 #include <sys/bufq.h>
117 #include <sys/sysctl.h>
118
119 #include <uvm/uvm_extern.h>
120
121 #include <dev/ccdvar.h>
122 #include <dev/dkvar.h>
123
124 #include <miscfs/specfs/specdev.h> /* for v_rdev */
125
126 #if defined(CCDDEBUG) && !defined(DEBUG)
127 #define DEBUG
128 #endif
129
130 #ifdef DEBUG
131 #define CCDB_FOLLOW 0x01
132 #define CCDB_INIT 0x02
133 #define CCDB_IO 0x04
134 #define CCDB_LABEL 0x08
135 #define CCDB_VNODE 0x10
136 int ccddebug = 0x00;
137 #endif
138
139 #define ccdunit(x) DISKUNIT(x)
140
141 struct ccdbuf {
142 struct buf cb_buf; /* new I/O buf */
143 struct buf *cb_obp; /* ptr. to original I/O buf */
144 struct ccd_softc *cb_sc; /* pointer to ccd softc */
145 int cb_comp; /* target component */
146 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */
147 };
148
149 /* component buffer pool */
150 static pool_cache_t ccd_cache;
151
152 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK)
153 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp)
154
155 #define CCDLABELDEV(dev) \
156 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART))
157
158 /* called by main() at boot time */
159 void ccdattach(int);
160
161 /* called by biodone() at interrupt time */
162 static void ccdiodone(struct buf *);
163
164 static void ccdinterleave(struct ccd_softc *);
165 static int ccdinit(struct ccd_softc *, char **, struct vnode **,
166 struct lwp *);
167 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *,
168 daddr_t, void *, long);
169 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *);
170 static void ccdgetdisklabel(dev_t);
171 static void ccdmakedisklabel(struct ccd_softc *);
172 static void ccdstart(struct ccd_softc *);
173 static void ccdthread(void *);
174 static struct ccd_softc *ccdget(int);
175
176 static dev_type_open(ccdopen);
177 static dev_type_close(ccdclose);
178 static dev_type_read(ccdread);
179 static dev_type_write(ccdwrite);
180 static dev_type_ioctl(ccdioctl);
181 static dev_type_strategy(ccdstrategy);
182 static dev_type_size(ccdsize);
183
184 const struct bdevsw ccd_bdevsw = {
185 .d_open = ccdopen,
186 .d_close = ccdclose,
187 .d_strategy = ccdstrategy,
188 .d_ioctl = ccdioctl,
189 .d_dump = nodump,
190 .d_psize = ccdsize,
191 .d_discard = nodiscard,
192 .d_flag = D_DISK | D_MPSAFE
193 };
194
195 const struct cdevsw ccd_cdevsw = {
196 .d_open = ccdopen,
197 .d_close = ccdclose,
198 .d_read = ccdread,
199 .d_write = ccdwrite,
200 .d_ioctl = ccdioctl,
201 .d_stop = nostop,
202 .d_tty = notty,
203 .d_poll = nopoll,
204 .d_mmap = nommap,
205 .d_kqfilter = nokqfilter,
206 .d_discard = nodiscard,
207 .d_flag = D_DISK | D_MPSAFE
208 };
209
210 #ifdef DEBUG
211 static void printiinfo(struct ccdiinfo *);
212 #endif
213
214 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds);
215 static kmutex_t ccd_lock;
216
217 static void
218 ccdminphys(struct buf *bp)
219 {
220 struct ccd_softc *cs;
221 long xmax;
222 int unit = ccdunit(bp->b_dev);
223
224 cs = ccdget(unit);
225
226 if (__predict_false(cs == NULL)) {
227 panic("minphys called on missing ccd unit %d", unit);
228 }
229 xmax = cs->sc_maxphys;
230
231 if (bp->b_bcount > xmax)
232 bp->b_bcount = xmax;
233 }
234
235 const struct dkdriver ccd_dkdriver = { ccdstrategy, ccdminphys };
236
237 static struct ccd_softc *
238 ccdcreate(int unit) {
239 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
240 if (sc == NULL) {
241 #ifdef DIAGNOSTIC
242 printf("%s: out of memory\n", __func__);
243 #endif
244 return NULL;
245 }
246 /* Initialize per-softc structures. */
247 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit);
248 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE);
249 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
250 cv_init(&sc->sc_stop, "ccdstop");
251 cv_init(&sc->sc_push, "ccdthr");
252 disk_init(&sc->sc_dkdev, sc->sc_xname, &ccd_dkdriver); /* XXX */
253 return sc;
254 }
255
256 static void
257 ccddestroy(struct ccd_softc *sc) {
258 mutex_obj_free(sc->sc_iolock);
259 mutex_exit(&sc->sc_dvlock);
260 mutex_destroy(&sc->sc_dvlock);
261 cv_destroy(&sc->sc_stop);
262 cv_destroy(&sc->sc_push);
263 disk_destroy(&sc->sc_dkdev);
264 kmem_free(sc, sizeof(*sc));
265 }
266
267 static struct ccd_softc *
268 ccdget(int unit) {
269 struct ccd_softc *sc;
270 if (unit < 0) {
271 #ifdef DIAGNOSTIC
272 panic("%s: unit %d!", __func__, unit);
273 #endif
274 return NULL;
275 }
276 mutex_enter(&ccd_lock);
277 LIST_FOREACH(sc, &ccds, sc_link) {
278 if (sc->sc_unit == unit) {
279 mutex_exit(&ccd_lock);
280 return sc;
281 }
282 }
283 mutex_exit(&ccd_lock);
284 if ((sc = ccdcreate(unit)) == NULL)
285 return NULL;
286 mutex_enter(&ccd_lock);
287 LIST_INSERT_HEAD(&ccds, sc, sc_link);
288 mutex_exit(&ccd_lock);
289 return sc;
290 }
291
292 static void
293 ccdput(struct ccd_softc *sc) {
294 mutex_enter(&ccd_lock);
295 LIST_REMOVE(sc, sc_link);
296 mutex_exit(&ccd_lock);
297 ccddestroy(sc);
298 }
299
300 /*
301 * Called by main() during pseudo-device attachment. All we need
302 * to do is allocate enough space for devices to be configured later.
303 */
304 void
305 ccdattach(int num)
306 {
307 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE);
308
309 /* Initialize the component buffer pool. */
310 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0,
311 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL);
312 }
313
314 static int
315 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp,
316 struct lwp *l)
317 {
318 struct ccdcinfo *ci = NULL;
319 int ix;
320 struct ccdgeom *ccg = &cs->sc_geom;
321 char *tmppath;
322 int error, path_alloced;
323 uint64_t psize, minsize;
324 unsigned secsize, maxsecsize;
325
326 #ifdef DEBUG
327 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
328 printf("%s: ccdinit\n", cs->sc_xname);
329 #endif
330
331 /* Allocate space for the component info. */
332 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo),
333 KM_SLEEP);
334 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
335
336 cs->sc_size = 0;
337 cs->sc_maxphys = MACHINE_MAXPHYS;
338
339 /*
340 * Verify that each component piece exists and record
341 * relevant information about it.
342 */
343 maxsecsize = 0;
344 minsize = 0;
345 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) {
346 ci = &cs->sc_cinfo[ix];
347 ci->ci_vp = vpp[ix];
348 struct disk *diskp;
349
350 /*
351 * Copy in the pathname of the component.
352 */
353 memset(tmppath, 0, MAXPATHLEN); /* sanity */
354 error = copyinstr(cpaths[ix], tmppath,
355 MAXPATHLEN, &ci->ci_pathlen);
356 if (ci->ci_pathlen == 0)
357 error = EINVAL;
358 if (error) {
359 #ifdef DEBUG
360 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
361 printf("%s: can't copy path, error = %d\n",
362 cs->sc_xname, error);
363 #endif
364 goto out;
365 }
366 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP);
367 memcpy(ci->ci_path, tmppath, ci->ci_pathlen);
368 path_alloced++;
369
370 /*
371 * XXX: Cache the component's dev_t.
372 */
373 ci->ci_dev = vpp[ix]->v_rdev;
374 if ((diskp = disk_find_blk(ci->ci_dev)) == NULL) {
375 panic("no disk for device %d %d", major(ci->ci_dev),
376 DISKUNIT(ci->ci_dev));
377 }
378 cs->sc_maxphys = MIN(cs->sc_maxphys, disk_maxphys(diskp));
379
380 /*
381 * Get partition information for the component.
382 */
383 error = getdisksize(vpp[ix], &psize, &secsize);
384 if (error) {
385 #ifdef DEBUG
386 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
387 printf("%s: %s: disksize failed, error = %d\n",
388 cs->sc_xname, ci->ci_path, error);
389 #endif
390 goto out;
391 }
392
393 /*
394 * Calculate the size, truncating to an interleave
395 * boundary if necessary.
396 */
397 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize;
398 if (cs->sc_ileave > 1)
399 psize -= psize % cs->sc_ileave;
400
401 if (psize == 0) {
402 #ifdef DEBUG
403 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
404 printf("%s: %s: size == 0\n",
405 cs->sc_xname, ci->ci_path);
406 #endif
407 error = ENODEV;
408 goto out;
409 }
410
411 if (minsize == 0 || psize < minsize)
412 minsize = psize;
413 ci->ci_size = psize;
414 cs->sc_size += psize;
415 }
416
417 /*
418 * Don't allow the interleave to be smaller than
419 * the biggest component sector.
420 */
421 if ((cs->sc_ileave > 0) &&
422 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
423 #ifdef DEBUG
424 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
425 printf("%s: interleave must be at least %d\n",
426 cs->sc_xname, (maxsecsize / DEV_BSIZE));
427 #endif
428 error = EINVAL;
429 goto out;
430 }
431
432 /*
433 * If uniform interleave is desired set all sizes to that of
434 * the smallest component.
435 */
436 if (cs->sc_flags & CCDF_UNIFORM) {
437 for (ci = cs->sc_cinfo;
438 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
439 ci->ci_size = minsize;
440
441 cs->sc_size = cs->sc_nccdisks * minsize;
442 }
443
444 /*
445 * Construct the interleave table.
446 */
447 ccdinterleave(cs);
448
449 /*
450 * Create pseudo-geometry based on 1MB cylinders. It's
451 * pretty close.
452 */
453 ccg->ccg_secsize = DEV_BSIZE;
454 ccg->ccg_ntracks = 1;
455 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize);
456 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
457
458 /*
459 * Create thread to handle deferred I/O.
460 */
461 cs->sc_zap = false;
462 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread,
463 cs, &cs->sc_thread, "%s", cs->sc_xname);
464 if (error) {
465 printf("ccdinit: can't create thread: %d\n", error);
466 goto out;
467 }
468
469 /*
470 * Only now that everything is set up can we enable the device.
471 */
472 mutex_enter(cs->sc_iolock);
473 cs->sc_flags |= CCDF_INITED;
474 mutex_exit(cs->sc_iolock);
475 kmem_free(tmppath, MAXPATHLEN);
476 return (0);
477
478 out:
479 for (ix = 0; ix < path_alloced; ix++) {
480 kmem_free(cs->sc_cinfo[ix].ci_path,
481 cs->sc_cinfo[ix].ci_pathlen);
482 }
483 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo));
484 kmem_free(tmppath, MAXPATHLEN);
485 return (error);
486 }
487
488 static void
489 ccdinterleave(struct ccd_softc *cs)
490 {
491 struct ccdcinfo *ci, *smallci;
492 struct ccdiinfo *ii;
493 daddr_t bn, lbn;
494 int ix;
495 u_long size;
496
497 #ifdef DEBUG
498 if (ccddebug & CCDB_INIT)
499 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
500 #endif
501 /*
502 * Allocate an interleave table.
503 * Chances are this is too big, but we don't care.
504 */
505 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
506 cs->sc_itable = kmem_zalloc(size, KM_SLEEP);
507
508 /*
509 * Trivial case: no interleave (actually interleave of disk size).
510 * Each table entry represents a single component in its entirety.
511 */
512 if (cs->sc_ileave == 0) {
513 bn = 0;
514 ii = cs->sc_itable;
515
516 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
517 /* Allocate space for ii_index. */
518 ii->ii_indexsz = sizeof(int);
519 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP);
520 ii->ii_ndisk = 1;
521 ii->ii_startblk = bn;
522 ii->ii_startoff = 0;
523 ii->ii_index[0] = ix;
524 bn += cs->sc_cinfo[ix].ci_size;
525 ii++;
526 }
527 ii->ii_ndisk = 0;
528 #ifdef DEBUG
529 if (ccddebug & CCDB_INIT)
530 printiinfo(cs->sc_itable);
531 #endif
532 return;
533 }
534
535 /*
536 * The following isn't fast or pretty; it doesn't have to be.
537 */
538 size = 0;
539 bn = lbn = 0;
540 for (ii = cs->sc_itable; ; ii++) {
541 /* Allocate space for ii_index. */
542 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks;
543 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP);
544
545 /*
546 * Locate the smallest of the remaining components
547 */
548 smallci = NULL;
549 for (ci = cs->sc_cinfo;
550 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
551 if (ci->ci_size > size &&
552 (smallci == NULL ||
553 ci->ci_size < smallci->ci_size))
554 smallci = ci;
555
556 /*
557 * Nobody left, all done
558 */
559 if (smallci == NULL) {
560 ii->ii_ndisk = 0;
561 break;
562 }
563
564 /*
565 * Record starting logical block and component offset
566 */
567 ii->ii_startblk = bn / cs->sc_ileave;
568 ii->ii_startoff = lbn;
569
570 /*
571 * Determine how many disks take part in this interleave
572 * and record their indices.
573 */
574 ix = 0;
575 for (ci = cs->sc_cinfo;
576 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
577 if (ci->ci_size >= smallci->ci_size)
578 ii->ii_index[ix++] = ci - cs->sc_cinfo;
579 ii->ii_ndisk = ix;
580 bn += ix * (smallci->ci_size - size);
581 lbn = smallci->ci_size / cs->sc_ileave;
582 size = smallci->ci_size;
583 }
584 #ifdef DEBUG
585 if (ccddebug & CCDB_INIT)
586 printiinfo(cs->sc_itable);
587 #endif
588 }
589
590 /* ARGSUSED */
591 static int
592 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l)
593 {
594 int unit = ccdunit(dev);
595 struct ccd_softc *cs;
596 struct disklabel *lp;
597 int error = 0, part, pmask;
598
599 #ifdef DEBUG
600 if (ccddebug & CCDB_FOLLOW)
601 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags);
602 #endif
603 if ((cs = ccdget(unit)) == NULL)
604 return ENXIO;
605
606 mutex_enter(&cs->sc_dvlock);
607
608 lp = cs->sc_dkdev.dk_label;
609
610 part = DISKPART(dev);
611 pmask = (1 << part);
612
613 /*
614 * If we're initialized, check to see if there are any other
615 * open partitions. If not, then it's safe to update
616 * the in-core disklabel. Only read the disklabel if it is
617 * not already valid.
618 */
619 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED &&
620 cs->sc_dkdev.dk_openmask == 0)
621 ccdgetdisklabel(dev);
622
623 /* Check that the partition exists. */
624 if (part != RAW_PART) {
625 if (((cs->sc_flags & CCDF_INITED) == 0) ||
626 ((part >= lp->d_npartitions) ||
627 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
628 error = ENXIO;
629 goto done;
630 }
631 }
632
633 /* Prevent our unit from being unconfigured while open. */
634 switch (fmt) {
635 case S_IFCHR:
636 cs->sc_dkdev.dk_copenmask |= pmask;
637 break;
638
639 case S_IFBLK:
640 cs->sc_dkdev.dk_bopenmask |= pmask;
641 break;
642 }
643 cs->sc_dkdev.dk_openmask =
644 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;
645
646 done:
647 mutex_exit(&cs->sc_dvlock);
648 return (error);
649 }
650
651 /* ARGSUSED */
652 static int
653 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l)
654 {
655 int unit = ccdunit(dev);
656 struct ccd_softc *cs;
657 int part;
658
659 #ifdef DEBUG
660 if (ccddebug & CCDB_FOLLOW)
661 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags);
662 #endif
663
664 if ((cs = ccdget(unit)) == NULL)
665 return ENXIO;
666
667 mutex_enter(&cs->sc_dvlock);
668
669 part = DISKPART(dev);
670
671 /* ...that much closer to allowing unconfiguration... */
672 switch (fmt) {
673 case S_IFCHR:
674 cs->sc_dkdev.dk_copenmask &= ~(1 << part);
675 break;
676
677 case S_IFBLK:
678 cs->sc_dkdev.dk_bopenmask &= ~(1 << part);
679 break;
680 }
681 cs->sc_dkdev.dk_openmask =
682 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;
683
684 if (cs->sc_dkdev.dk_openmask == 0) {
685 if ((cs->sc_flags & CCDF_KLABEL) == 0)
686 cs->sc_flags &= ~CCDF_VLABEL;
687 }
688
689 mutex_exit(&cs->sc_dvlock);
690 return (0);
691 }
692
693 static bool
694 ccdbackoff(struct ccd_softc *cs)
695 {
696
697 /* XXX Arbitrary, should be a uvm call. */
698 return uvmexp.free < (uvmexp.freemin >> 1) &&
699 disk_isbusy(&cs->sc_dkdev);
700 }
701
702 static void
703 ccdthread(void *cookie)
704 {
705 struct ccd_softc *cs;
706
707 cs = cookie;
708
709 #ifdef DEBUG
710 if (ccddebug & CCDB_FOLLOW)
711 printf("ccdthread: hello\n");
712 #endif
713
714 mutex_enter(cs->sc_iolock);
715 while (__predict_true(!cs->sc_zap)) {
716 if (bufq_peek(cs->sc_bufq) == NULL) {
717 /* Nothing to do. */
718 cv_wait(&cs->sc_push, cs->sc_iolock);
719 continue;
720 }
721 if (ccdbackoff(cs)) {
722 /* Wait for memory to become available. */
723 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1);
724 continue;
725 }
726 #ifdef DEBUG
727 if (ccddebug & CCDB_FOLLOW)
728 printf("ccdthread: dispatching I/O\n");
729 #endif
730 ccdstart(cs);
731 mutex_enter(cs->sc_iolock);
732 }
733 cs->sc_thread = NULL;
734 mutex_exit(cs->sc_iolock);
735 #ifdef DEBUG
736 if (ccddebug & CCDB_FOLLOW)
737 printf("ccdthread: goodbye\n");
738 #endif
739 kthread_exit(0);
740 }
741
742 static void
743 ccdstrategy(struct buf *bp)
744 {
745 int unit = ccdunit(bp->b_dev);
746 struct ccd_softc *cs;
747 if ((cs = ccdget(unit)) == NULL)
748 return;
749
750 /* Must be open or reading label. */
751 KASSERT(cs->sc_dkdev.dk_openmask != 0 ||
752 (cs->sc_flags & CCDF_RLABEL) != 0);
753
754 mutex_enter(cs->sc_iolock);
755 /* Synchronize with device init/uninit. */
756 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) {
757 mutex_exit(cs->sc_iolock);
758 #ifdef DEBUG
759 if (ccddebug & CCDB_FOLLOW)
760 printf("ccdstrategy: unit %d: not inited\n", unit);
761 #endif
762 bp->b_error = ENXIO;
763 bp->b_resid = bp->b_bcount;
764 biodone(bp);
765 return;
766 }
767
768 /* Defer to thread if system is low on memory. */
769 bufq_put(cs->sc_bufq, bp);
770 if (__predict_false(ccdbackoff(cs))) {
771 mutex_exit(cs->sc_iolock);
772 #ifdef DEBUG
773 if (ccddebug & CCDB_FOLLOW)
774 printf("ccdstrategy: holding off on I/O\n");
775 #endif
776 return;
777 }
778 ccdstart(cs);
779 }
780
781 static void
782 ccdstart(struct ccd_softc *cs)
783 {
784 daddr_t blkno;
785 int wlabel;
786 struct disklabel *lp;
787 long bcount, rcount;
788 struct ccdbuf *cbp;
789 char *addr;
790 daddr_t bn;
791 vnode_t *vp;
792 buf_t *bp;
793
794 KASSERT(mutex_owned(cs->sc_iolock));
795
796 disk_busy(&cs->sc_dkdev);
797 bp = bufq_get(cs->sc_bufq);
798 KASSERT(bp != NULL);
799
800 #ifdef DEBUG
801 if (ccddebug & CCDB_FOLLOW)
802 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp);
803 #endif
804
805 /* If it's a nil transfer, wake up the top half now. */
806 if (bp->b_bcount == 0)
807 goto done;
808
809 lp = cs->sc_dkdev.dk_label;
810
811 /*
812 * Do bounds checking and adjust transfer. If there's an
813 * error, the bounds check will flag that for us. Convert
814 * the partition relative block number to an absolute.
815 */
816 blkno = bp->b_blkno;
817 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
818 if (DISKPART(bp->b_dev) != RAW_PART) {
819 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0)
820 goto done;
821 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
822 }
823 mutex_exit(cs->sc_iolock);
824 bp->b_rawblkno = blkno;
825
826 /* Allocate the component buffers and start I/O! */
827 bp->b_resid = bp->b_bcount;
828 bn = bp->b_rawblkno;
829 addr = bp->b_data;
830 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
831 cbp = ccdbuffer(cs, bp, bn, addr, bcount);
832 rcount = cbp->cb_buf.b_bcount;
833 bn += btodb(rcount);
834 addr += rcount;
835 vp = cbp->cb_buf.b_vp;
836 if ((cbp->cb_buf.b_flags & B_READ) == 0) {
837 mutex_enter(vp->v_interlock);
838 vp->v_numoutput++;
839 mutex_exit(vp->v_interlock);
840 }
841 (void)VOP_STRATEGY(vp, &cbp->cb_buf);
842 }
843 return;
844
845 done:
846 disk_unbusy(&cs->sc_dkdev, 0, 0);
847 cv_broadcast(&cs->sc_stop);
848 cv_broadcast(&cs->sc_push);
849 mutex_exit(cs->sc_iolock);
850 bp->b_resid = bp->b_bcount;
851 biodone(bp);
852 }
853
854 /*
855 * Build a component buffer header.
856 */
857 static struct ccdbuf *
858 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr,
859 long bcount)
860 {
861 struct ccdcinfo *ci;
862 struct ccdbuf *cbp;
863 daddr_t cbn, cboff;
864 u_int64_t cbc;
865 int ccdisk;
866
867 #ifdef DEBUG
868 if (ccddebug & CCDB_IO)
869 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n",
870 cs, bp, bn, addr, bcount);
871 #endif
872 /*
873 * Determine which component bn falls in.
874 */
875 cbn = bn;
876 cboff = 0;
877
878 /*
879 * Serially concatenated
880 */
881 if (cs->sc_ileave == 0) {
882 daddr_t sblk;
883
884 sblk = 0;
885 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk];
886 cbn >= sblk + ci->ci_size;
887 ccdisk++, ci = &cs->sc_cinfo[ccdisk])
888 sblk += ci->ci_size;
889 cbn -= sblk;
890 }
891 /*
892 * Interleaved
893 */
894 else {
895 struct ccdiinfo *ii;
896 int off;
897
898 cboff = cbn % cs->sc_ileave;
899 cbn /= cs->sc_ileave;
900 for (ii = cs->sc_itable; ii->ii_ndisk; ii++)
901 if (ii->ii_startblk > cbn)
902 break;
903 ii--;
904 off = cbn - ii->ii_startblk;
905 if (ii->ii_ndisk == 1) {
906 ccdisk = ii->ii_index[0];
907 cbn = ii->ii_startoff + off;
908 } else {
909 ccdisk = ii->ii_index[off % ii->ii_ndisk];
910 cbn = ii->ii_startoff + off / ii->ii_ndisk;
911 }
912 cbn *= cs->sc_ileave;
913 ci = &cs->sc_cinfo[ccdisk];
914 }
915
916 /*
917 * Fill in the component buf structure.
918 */
919 cbp = CCD_GETBUF();
920 KASSERT(cbp != NULL);
921 buf_init(&cbp->cb_buf);
922 cbp->cb_buf.b_flags = bp->b_flags;
923 cbp->cb_buf.b_oflags = bp->b_oflags;
924 cbp->cb_buf.b_cflags = bp->b_cflags;
925 cbp->cb_buf.b_iodone = ccdiodone;
926 cbp->cb_buf.b_proc = bp->b_proc;
927 cbp->cb_buf.b_dev = ci->ci_dev;
928 cbp->cb_buf.b_blkno = cbn + cboff;
929 cbp->cb_buf.b_data = addr;
930 cbp->cb_buf.b_vp = ci->ci_vp;
931 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock;
932 if (cs->sc_ileave == 0)
933 cbc = dbtob((u_int64_t)(ci->ci_size - cbn));
934 else
935 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff));
936 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount;
937
938 /*
939 * context for ccdiodone
940 */
941 cbp->cb_obp = bp;
942 cbp->cb_sc = cs;
943 cbp->cb_comp = ccdisk;
944
945 BIO_COPYPRIO(&cbp->cb_buf, bp);
946
947 #ifdef DEBUG
948 if (ccddebug & CCDB_IO)
949 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p"
950 " bcnt %d\n",
951 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp,
952 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
953 cbp->cb_buf.b_bcount);
954 #endif
955
956 return (cbp);
957 }
958
959 /*
960 * Called at interrupt time.
961 * Mark the component as done and if all components are done,
962 * take a ccd interrupt.
963 */
964 static void
965 ccdiodone(struct buf *vbp)
966 {
967 struct ccdbuf *cbp = (struct ccdbuf *) vbp;
968 struct buf *bp = cbp->cb_obp;
969 struct ccd_softc *cs = cbp->cb_sc;
970 int count;
971
972 #ifdef DEBUG
973 if (ccddebug & CCDB_FOLLOW)
974 printf("ccdiodone(%p)\n", cbp);
975 if (ccddebug & CCDB_IO) {
976 printf("ccdiodone: bp %p bcount %d resid %d\n",
977 bp, bp->b_bcount, bp->b_resid);
978 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p"
979 " bcnt %d\n",
980 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
981 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
982 cbp->cb_buf.b_bcount);
983 }
984 #endif
985
986 if (cbp->cb_buf.b_error != 0) {
987 bp->b_error = cbp->cb_buf.b_error;
988 printf("%s: error %d on component %d\n",
989 cs->sc_xname, bp->b_error, cbp->cb_comp);
990 }
991 count = cbp->cb_buf.b_bcount;
992 buf_destroy(&cbp->cb_buf);
993 CCD_PUTBUF(cbp);
994
995 /*
996 * If all done, "interrupt".
997 */
998 mutex_enter(cs->sc_iolock);
999 bp->b_resid -= count;
1000 if (bp->b_resid < 0)
1001 panic("ccdiodone: count");
1002 if (bp->b_resid == 0) {
1003 /*
1004 * Request is done for better or worse, wakeup the top half.
1005 */
1006 if (bp->b_error != 0)
1007 bp->b_resid = bp->b_bcount;
1008 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid),
1009 (bp->b_flags & B_READ));
1010 if (!disk_isbusy(&cs->sc_dkdev)) {
1011 if (bufq_peek(cs->sc_bufq) != NULL) {
1012 cv_broadcast(&cs->sc_push);
1013 }
1014 cv_broadcast(&cs->sc_stop);
1015 }
1016 mutex_exit(cs->sc_iolock);
1017 biodone(bp);
1018 } else
1019 mutex_exit(cs->sc_iolock);
1020 }
1021
1022 /* ARGSUSED */
1023 static int
1024 ccdread(dev_t dev, struct uio *uio, int flags)
1025 {
1026 int unit = ccdunit(dev);
1027 struct ccd_softc *cs;
1028
1029 #ifdef DEBUG
1030 if (ccddebug & CCDB_FOLLOW)
1031 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio);
1032 #endif
1033 if ((cs = ccdget(unit)) == NULL)
1034 return 0;
1035
1036 /* Unlocked advisory check, ccdstrategy check is synchronous. */
1037 if ((cs->sc_flags & CCDF_INITED) == 0)
1038 return (ENXIO);
1039
1040 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio));
1041 }
1042
1043 /* ARGSUSED */
1044 static int
1045 ccdwrite(dev_t dev, struct uio *uio, int flags)
1046 {
1047 int unit = ccdunit(dev);
1048 struct ccd_softc *cs;
1049
1050 #ifdef DEBUG
1051 if (ccddebug & CCDB_FOLLOW)
1052 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio);
1053 #endif
1054 if ((cs = ccdget(unit)) == NULL)
1055 return ENOENT;
1056
1057 /* Unlocked advisory check, ccdstrategy check is synchronous. */
1058 if ((cs->sc_flags & CCDF_INITED) == 0)
1059 return (ENXIO);
1060
1061 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio));
1062 }
1063
1064 static int
1065 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1066 {
1067 int unit = ccdunit(dev);
1068 int i, j, lookedup = 0, error = 0;
1069 int part, pmask;
1070 struct ccd_softc *cs;
1071 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1072 kauth_cred_t uc;
1073 char **cpp;
1074 struct pathbuf *pb;
1075 struct vnode **vpp;
1076 #ifdef __HAVE_OLD_DISKLABEL
1077 struct disklabel newlabel;
1078 #endif
1079
1080 if ((cs = ccdget(unit)) == NULL)
1081 return ENOENT;
1082 uc = kauth_cred_get();
1083
1084 /* Must be open for writes for these commands... */
1085 switch (cmd) {
1086 case CCDIOCSET:
1087 case CCDIOCCLR:
1088 case DIOCSDINFO:
1089 case DIOCWDINFO:
1090 #ifdef __HAVE_OLD_DISKLABEL
1091 case ODIOCSDINFO:
1092 case ODIOCWDINFO:
1093 #endif
1094 case DIOCKLABEL:
1095 case DIOCWLABEL:
1096 if ((flag & FWRITE) == 0)
1097 return (EBADF);
1098 }
1099
1100 mutex_enter(&cs->sc_dvlock);
1101
1102 /* Must be initialized for these... */
1103 switch (cmd) {
1104 case CCDIOCCLR:
1105 case DIOCGDINFO:
1106 case DIOCCACHESYNC:
1107 case DIOCSDINFO:
1108 case DIOCWDINFO:
1109 case DIOCGPART:
1110 case DIOCWLABEL:
1111 case DIOCKLABEL:
1112 case DIOCGDEFLABEL:
1113 #ifdef __HAVE_OLD_DISKLABEL
1114 case ODIOCGDINFO:
1115 case ODIOCSDINFO:
1116 case ODIOCWDINFO:
1117 case ODIOCGDEFLABEL:
1118 #endif
1119 if ((cs->sc_flags & CCDF_INITED) == 0) {
1120 error = ENXIO;
1121 goto out;
1122 }
1123 }
1124
1125 switch (cmd) {
1126 case CCDIOCSET:
1127 if (cs->sc_flags & CCDF_INITED) {
1128 error = EBUSY;
1129 goto out;
1130 }
1131
1132 /* Validate the flags. */
1133 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) {
1134 error = EINVAL;
1135 goto out;
1136 }
1137
1138 if (ccio->ccio_ndisks > CCD_MAXNDISKS ||
1139 ccio->ccio_ndisks == 0) {
1140 error = EINVAL;
1141 goto out;
1142 }
1143
1144 /* Fill in some important bits. */
1145 cs->sc_ileave = ccio->ccio_ileave;
1146 cs->sc_nccdisks = ccio->ccio_ndisks;
1147 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK;
1148
1149 /*
1150 * Allocate space for and copy in the array of
1151 * componet pathnames and device numbers.
1152 */
1153 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP);
1154 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP);
1155 error = copyin(ccio->ccio_disks, cpp,
1156 ccio->ccio_ndisks * sizeof(*cpp));
1157 if (error) {
1158 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
1159 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
1160 goto out;
1161 }
1162
1163 #ifdef DEBUG
1164 if (ccddebug & CCDB_INIT)
1165 for (i = 0; i < ccio->ccio_ndisks; ++i)
1166 printf("ccdioctl: component %d: %p\n",
1167 i, cpp[i]);
1168 #endif
1169
1170 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1171 #ifdef DEBUG
1172 if (ccddebug & CCDB_INIT)
1173 printf("ccdioctl: lookedup = %d\n", lookedup);
1174 #endif
1175 error = pathbuf_copyin(cpp[i], &pb);
1176 if (error == 0) {
1177 error = dk_lookup(pb, l, &vpp[i]);
1178 }
1179 pathbuf_destroy(pb);
1180 if (error != 0) {
1181 for (j = 0; j < lookedup; ++j)
1182 (void)vn_close(vpp[j], FREAD|FWRITE,
1183 uc);
1184 kmem_free(vpp, ccio->ccio_ndisks *
1185 sizeof(*vpp));
1186 kmem_free(cpp, ccio->ccio_ndisks *
1187 sizeof(*cpp));
1188 goto out;
1189 }
1190 ++lookedup;
1191 }
1192
1193 /* Attach the disk. */
1194 disk_attach(&cs->sc_dkdev);
1195 bufq_alloc(&cs->sc_bufq, "fcfs", 0);
1196
1197 /*
1198 * Initialize the ccd. Fills in the softc for us.
1199 */
1200 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) {
1201 for (j = 0; j < lookedup; ++j)
1202 (void)vn_close(vpp[j], FREAD|FWRITE,
1203 uc);
1204 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
1205 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
1206 disk_detach(&cs->sc_dkdev);
1207 bufq_free(cs->sc_bufq);
1208 goto out;
1209 }
1210
1211 /* We can free the temporary variables now. */
1212 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
1213 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
1214
1215 /*
1216 * The ccd has been successfully initialized, so
1217 * we can place it into the array. Don't try to
1218 * read the disklabel until the disk has been attached,
1219 * because space for the disklabel is allocated
1220 * in disk_attach();
1221 */
1222 ccio->ccio_unit = unit;
1223 ccio->ccio_size = cs->sc_size;
1224
1225 /* Try and read the disklabel. */
1226 ccdgetdisklabel(dev);
1227 break;
1228
1229 case CCDIOCCLR:
1230 /*
1231 * Don't unconfigure if any other partitions are open
1232 * or if both the character and block flavors of this
1233 * partition are open.
1234 */
1235 part = DISKPART(dev);
1236 pmask = (1 << part);
1237 if ((cs->sc_dkdev.dk_openmask & ~pmask) ||
1238 ((cs->sc_dkdev.dk_bopenmask & pmask) &&
1239 (cs->sc_dkdev.dk_copenmask & pmask))) {
1240 error = EBUSY;
1241 goto out;
1242 }
1243
1244 /* Stop new I/O, wait for in-flight I/O to complete. */
1245 mutex_enter(cs->sc_iolock);
1246 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL);
1247 cs->sc_zap = true;
1248 while (disk_isbusy(&cs->sc_dkdev) ||
1249 bufq_peek(cs->sc_bufq) != NULL ||
1250 cs->sc_thread != NULL) {
1251 cv_broadcast(&cs->sc_push);
1252 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz);
1253 }
1254 mutex_exit(cs->sc_iolock);
1255
1256 /*
1257 * Free ccd_softc information and clear entry.
1258 */
1259
1260 /* Close the components and free their pathnames. */
1261 for (i = 0; i < cs->sc_nccdisks; ++i) {
1262 /*
1263 * XXX: this close could potentially fail and
1264 * cause Bad Things. Maybe we need to force
1265 * the close to happen?
1266 */
1267 #ifdef DEBUG
1268 if (ccddebug & CCDB_VNODE)
1269 vprint("CCDIOCCLR: vnode info",
1270 cs->sc_cinfo[i].ci_vp);
1271 #endif
1272 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1273 uc);
1274 kmem_free(cs->sc_cinfo[i].ci_path,
1275 cs->sc_cinfo[i].ci_pathlen);
1276 }
1277
1278 /* Free interleave index. */
1279 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) {
1280 kmem_free(cs->sc_itable[i].ii_index,
1281 cs->sc_itable[i].ii_indexsz);
1282 }
1283
1284 /* Free component info and interleave table. */
1285 kmem_free(cs->sc_cinfo, cs->sc_nccdisks *
1286 sizeof(struct ccdcinfo));
1287 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) *
1288 sizeof(struct ccdiinfo));
1289
1290 /* Detatch the disk. */
1291 disk_detach(&cs->sc_dkdev);
1292 bufq_free(cs->sc_bufq);
1293 ccdput(cs);
1294 /* Don't break, otherwise cs is read again. */
1295 return 0;
1296
1297 case DIOCGDINFO:
1298 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label);
1299 break;
1300
1301 #ifdef __HAVE_OLD_DISKLABEL
1302 case ODIOCGDINFO:
1303 newlabel = *(cs->sc_dkdev.dk_label);
1304 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1305 return ENOTTY;
1306 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1307 break;
1308 #endif
1309
1310 case DIOCGPART:
1311 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label;
1312 ((struct partinfo *)data)->part =
1313 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1314 break;
1315
1316 case DIOCCACHESYNC:
1317 /*
1318 * XXX Do we really need to care about having a writable
1319 * file descriptor here?
1320 */
1321 if ((flag & FWRITE) == 0)
1322 return (EBADF);
1323
1324 /*
1325 * We pass this call down to all components and report
1326 * the first error we encounter.
1327 */
1328 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) {
1329 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data,
1330 flag, uc);
1331 if (j != 0 && error == 0)
1332 error = j;
1333 }
1334 break;
1335
1336 case DIOCWDINFO:
1337 case DIOCSDINFO:
1338 #ifdef __HAVE_OLD_DISKLABEL
1339 case ODIOCWDINFO:
1340 case ODIOCSDINFO:
1341 #endif
1342 {
1343 struct disklabel *lp;
1344 #ifdef __HAVE_OLD_DISKLABEL
1345 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1346 memset(&newlabel, 0, sizeof newlabel);
1347 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1348 lp = &newlabel;
1349 } else
1350 #endif
1351 lp = (struct disklabel *)data;
1352
1353 cs->sc_flags |= CCDF_LABELLING;
1354
1355 error = setdisklabel(cs->sc_dkdev.dk_label,
1356 lp, 0, cs->sc_dkdev.dk_cpulabel);
1357 if (error == 0) {
1358 if (cmd == DIOCWDINFO
1359 #ifdef __HAVE_OLD_DISKLABEL
1360 || cmd == ODIOCWDINFO
1361 #endif
1362 )
1363 error = writedisklabel(CCDLABELDEV(dev),
1364 ccdstrategy, cs->sc_dkdev.dk_label,
1365 cs->sc_dkdev.dk_cpulabel);
1366 }
1367
1368 cs->sc_flags &= ~CCDF_LABELLING;
1369 break;
1370 }
1371
1372 case DIOCKLABEL:
1373 if (*(int *)data != 0)
1374 cs->sc_flags |= CCDF_KLABEL;
1375 else
1376 cs->sc_flags &= ~CCDF_KLABEL;
1377 break;
1378
1379 case DIOCWLABEL:
1380 if (*(int *)data != 0)
1381 cs->sc_flags |= CCDF_WLABEL;
1382 else
1383 cs->sc_flags &= ~CCDF_WLABEL;
1384 break;
1385
1386 case DIOCGDEFLABEL:
1387 ccdgetdefaultlabel(cs, (struct disklabel *)data);
1388 break;
1389
1390 #ifdef __HAVE_OLD_DISKLABEL
1391 case ODIOCGDEFLABEL:
1392 ccdgetdefaultlabel(cs, &newlabel);
1393 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1394 return ENOTTY;
1395 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1396 break;
1397 #endif
1398
1399 default:
1400 error = ENOTTY;
1401 }
1402
1403 out:
1404 mutex_exit(&cs->sc_dvlock);
1405 return (error);
1406 }
1407
1408 static int
1409 ccdsize(dev_t dev)
1410 {
1411 struct ccd_softc *cs;
1412 struct disklabel *lp;
1413 int part, unit, omask, size;
1414
1415 unit = ccdunit(dev);
1416 if ((cs = ccdget(unit)) == NULL)
1417 return -1;
1418
1419 if ((cs->sc_flags & CCDF_INITED) == 0)
1420 return (-1);
1421
1422 part = DISKPART(dev);
1423 omask = cs->sc_dkdev.dk_openmask & (1 << part);
1424 lp = cs->sc_dkdev.dk_label;
1425
1426 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp))
1427 return (-1);
1428
1429 if (lp->d_partitions[part].p_fstype != FS_SWAP)
1430 size = -1;
1431 else
1432 size = lp->d_partitions[part].p_size *
1433 (lp->d_secsize / DEV_BSIZE);
1434
1435 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp))
1436 return (-1);
1437
1438 return (size);
1439 }
1440
1441 static void
1442 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp)
1443 {
1444 struct ccdgeom *ccg = &cs->sc_geom;
1445
1446 memset(lp, 0, sizeof(*lp));
1447
1448 lp->d_secperunit = cs->sc_size;
1449 lp->d_secsize = ccg->ccg_secsize;
1450 lp->d_nsectors = ccg->ccg_nsectors;
1451 lp->d_ntracks = ccg->ccg_ntracks;
1452 lp->d_ncylinders = ccg->ccg_ncylinders;
1453 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1454
1455 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1456 lp->d_type = DTYPE_CCD;
1457 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1458 lp->d_rpm = 3600;
1459 lp->d_interleave = 1;
1460 lp->d_flags = 0;
1461
1462 lp->d_partitions[RAW_PART].p_offset = 0;
1463 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1464 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1465 lp->d_npartitions = RAW_PART + 1;
1466
1467 lp->d_magic = DISKMAGIC;
1468 lp->d_magic2 = DISKMAGIC;
1469 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label);
1470 }
1471
1472 /*
1473 * Read the disklabel from the ccd. If one is not present, fake one
1474 * up.
1475 */
1476 static void
1477 ccdgetdisklabel(dev_t dev)
1478 {
1479 int unit = ccdunit(dev);
1480 struct ccd_softc *cs;
1481 const char *errstring;
1482 struct disklabel *lp;
1483 struct cpu_disklabel *clp;
1484
1485 if ((cs = ccdget(unit)) == NULL)
1486 return;
1487 lp = cs->sc_dkdev.dk_label;
1488 clp = cs->sc_dkdev.dk_cpulabel;
1489 KASSERT(mutex_owned(&cs->sc_dvlock));
1490
1491 memset(clp, 0, sizeof(*clp));
1492
1493 ccdgetdefaultlabel(cs, lp);
1494
1495 /*
1496 * Call the generic disklabel extraction routine.
1497 */
1498 cs->sc_flags |= CCDF_RLABEL;
1499 if ((cs->sc_flags & CCDF_NOLABEL) != 0)
1500 errstring = "CCDF_NOLABEL set; ignoring on-disk label";
1501 else
1502 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy,
1503 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel);
1504 if (errstring)
1505 ccdmakedisklabel(cs);
1506 else {
1507 int i;
1508 struct partition *pp;
1509
1510 /*
1511 * Sanity check whether the found disklabel is valid.
1512 *
1513 * This is necessary since total size of ccd may vary
1514 * when an interleave is changed even though exactly
1515 * same componets are used, and old disklabel may used
1516 * if that is found.
1517 */
1518 if (lp->d_secperunit != cs->sc_size)
1519 printf("WARNING: %s: "
1520 "total sector size in disklabel (%d) != "
1521 "the size of ccd (%lu)\n", cs->sc_xname,
1522 lp->d_secperunit, (u_long)cs->sc_size);
1523 for (i = 0; i < lp->d_npartitions; i++) {
1524 pp = &lp->d_partitions[i];
1525 if (pp->p_offset + pp->p_size > cs->sc_size)
1526 printf("WARNING: %s: end of partition `%c' "
1527 "exceeds the size of ccd (%lu)\n",
1528 cs->sc_xname, 'a' + i, (u_long)cs->sc_size);
1529 }
1530 }
1531
1532 #ifdef DEBUG
1533 /* It's actually extremely common to have unlabeled ccds. */
1534 if (ccddebug & CCDB_LABEL)
1535 if (errstring != NULL)
1536 printf("%s: %s\n", cs->sc_xname, errstring);
1537 #endif
1538
1539 /* In-core label now valid. */
1540 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL;
1541 }
1542
1543 /*
1544 * Take care of things one might want to take care of in the event
1545 * that a disklabel isn't present.
1546 */
1547 static void
1548 ccdmakedisklabel(struct ccd_softc *cs)
1549 {
1550 struct disklabel *lp = cs->sc_dkdev.dk_label;
1551
1552 /*
1553 * For historical reasons, if there's no disklabel present
1554 * the raw partition must be marked FS_BSDFFS.
1555 */
1556 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1557
1558 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1559
1560 lp->d_checksum = dkcksum(lp);
1561 }
1562
1563 #ifdef DEBUG
1564 static void
1565 printiinfo(struct ccdiinfo *ii)
1566 {
1567 int ix, i;
1568
1569 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1570 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64,
1571 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1572 for (i = 0; i < ii->ii_ndisk; i++)
1573 printf(" %d", ii->ii_index[i]);
1574 printf("\n");
1575 }
1576 }
1577 #endif
1578
1579 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr");
1580
1581 static int
1582 ccd_modcmd(modcmd_t cmd, void *arg)
1583 {
1584 int error = 0;
1585 #ifdef _MODULE
1586 int bmajor = -1, cmajor = -1;
1587 #endif
1588
1589
1590 switch (cmd) {
1591 case MODULE_CMD_INIT:
1592 #ifdef _MODULE
1593 ccdattach(4);
1594
1595 return devsw_attach("ccd", &ccd_bdevsw, &bmajor,
1596 &ccd_cdevsw, &cmajor);
1597 #endif
1598 break;
1599
1600 case MODULE_CMD_FINI:
1601 #ifdef _MODULE
1602 return devsw_detach(&ccd_bdevsw, &ccd_cdevsw);
1603 #endif
1604 break;
1605
1606 case MODULE_CMD_STAT:
1607 return ENOTTY;
1608
1609 default:
1610 return ENOTTY;
1611 }
1612
1613 return error;
1614 }
1615
1616 static int
1617 ccd_units_sysctl(SYSCTLFN_ARGS)
1618 {
1619 struct sysctlnode node;
1620 struct ccd_softc *sc;
1621 int error, i, nccd, *units;
1622 size_t size;
1623
1624 nccd = 0;
1625 mutex_enter(&ccd_lock);
1626 LIST_FOREACH(sc, &ccds, sc_link)
1627 nccd++;
1628 mutex_exit(&ccd_lock);
1629
1630 if (nccd != 0) {
1631 size = nccd * sizeof(*units);
1632 units = kmem_zalloc(size, KM_SLEEP);
1633 if (units == NULL)
1634 return ENOMEM;
1635
1636 i = 0;
1637 mutex_enter(&ccd_lock);
1638 LIST_FOREACH(sc, &ccds, sc_link) {
1639 if (i >= nccd)
1640 break;
1641 units[i] = sc->sc_unit;
1642 }
1643 mutex_exit(&ccd_lock);
1644 } else {
1645 units = NULL;
1646 size = 0;
1647 }
1648
1649 node = *rnode;
1650 node.sysctl_data = units;
1651 node.sysctl_size = size;
1652
1653 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1654 if (units)
1655 kmem_free(units, size);
1656 return error;
1657 }
1658
1659 static int
1660 ccd_info_sysctl(SYSCTLFN_ARGS)
1661 {
1662 struct sysctlnode node;
1663 struct ccddiskinfo ccd;
1664 struct ccd_softc *sc;
1665 int unit;
1666
1667 if (newp == NULL || newlen != sizeof(int))
1668 return EINVAL;
1669
1670 unit = *(const int *)newp;
1671 newp = NULL;
1672 newlen = 0;
1673 ccd.ccd_ndisks = ~0;
1674 mutex_enter(&ccd_lock);
1675 LIST_FOREACH(sc, &ccds, sc_link) {
1676 if (sc->sc_unit == unit) {
1677 ccd.ccd_ileave = sc->sc_ileave;
1678 ccd.ccd_size = sc->sc_size;
1679 ccd.ccd_ndisks = sc->sc_nccdisks;
1680 ccd.ccd_flags = sc->sc_flags;
1681 break;
1682 }
1683 }
1684 mutex_exit(&ccd_lock);
1685
1686 if (ccd.ccd_ndisks == ~0)
1687 return ENOENT;
1688
1689 node = *rnode;
1690 node.sysctl_data = &ccd;
1691 node.sysctl_size = sizeof(ccd);
1692
1693 return sysctl_lookup(SYSCTLFN_CALL(&node));
1694 }
1695
1696 static int
1697 ccd_components_sysctl(SYSCTLFN_ARGS)
1698 {
1699 struct sysctlnode node;
1700 int error, unit;
1701 size_t size;
1702 char *names, *p, *ep;
1703 struct ccd_softc *sc;
1704
1705 if (newp == NULL || newlen != sizeof(int))
1706 return EINVAL;
1707
1708 size = 0;
1709 unit = *(const int *)newp;
1710 newp = NULL;
1711 newlen = 0;
1712 mutex_enter(&ccd_lock);
1713 LIST_FOREACH(sc, &ccds, sc_link)
1714 if (sc->sc_unit == unit) {
1715 for (size_t i = 0; i < sc->sc_nccdisks; i++)
1716 size += strlen(sc->sc_cinfo[i].ci_path) + 1;
1717 break;
1718 }
1719 mutex_exit(&ccd_lock);
1720
1721 if (size == 0)
1722 return ENOENT;
1723 names = kmem_zalloc(size, KM_SLEEP);
1724 if (names == NULL)
1725 return ENOMEM;
1726
1727 p = names;
1728 ep = names + size;
1729 mutex_enter(&ccd_lock);
1730 LIST_FOREACH(sc, &ccds, sc_link)
1731 if (sc->sc_unit == unit) {
1732 for (size_t i = 0; i < sc->sc_nccdisks; i++) {
1733 char *d = sc->sc_cinfo[i].ci_path;
1734 while (p < ep && (*p++ = *d++) != '\0')
1735 continue;
1736 }
1737 break;
1738 }
1739 mutex_exit(&ccd_lock);
1740
1741 node = *rnode;
1742 node.sysctl_data = names;
1743 node.sysctl_size = ep - names;
1744
1745 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1746 kmem_free(names, size);
1747 return error;
1748 }
1749
1750 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup")
1751 {
1752 const struct sysctlnode *node = NULL;
1753
1754 sysctl_createv(clog, 0, NULL, &node,
1755 CTLFLAG_PERMANENT,
1756 CTLTYPE_NODE, "ccd",
1757 SYSCTL_DESCR("ConCatenated Disk state"),
1758 NULL, 0, NULL, 0,
1759 CTL_KERN, CTL_CREATE, CTL_EOL);
1760
1761 if (node == NULL)
1762 return;
1763
1764 sysctl_createv(clog, 0, &node, NULL,
1765 CTLFLAG_PERMANENT | CTLFLAG_READONLY,
1766 CTLTYPE_STRUCT, "units",
1767 SYSCTL_DESCR("List of ccd unit numbers"),
1768 ccd_units_sysctl, 0, NULL, 0,
1769 CTL_CREATE, CTL_EOL);
1770 sysctl_createv(clog, 0, &node, NULL,
1771 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1772 CTLTYPE_STRUCT, "info",
1773 SYSCTL_DESCR("Information about a CCD unit"),
1774 ccd_info_sysctl, 0, NULL, 0,
1775 CTL_CREATE, CTL_EOL);
1776 sysctl_createv(clog, 0, &node, NULL,
1777 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1778 CTLTYPE_STRUCT, "components",
1779 SYSCTL_DESCR("Information about CCD components"),
1780 ccd_components_sysctl, 0, NULL, 0,
1781 CTL_CREATE, CTL_EOL);
1782 }
1783