Home | History | Annotate | Line # | Download | only in dev
iommu.c revision 1.83
      1 /*	$NetBSD: iommu.c,v 1.83 2008/06/04 12:41:41 ad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1999, 2000 Matthew R. Green
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Copyright (c) 2001, 2002 Eduardo Horvath
     31  * All rights reserved.
     32  *
     33  * Redistribution and use in source and binary forms, with or without
     34  * modification, are permitted provided that the following conditions
     35  * are met:
     36  * 1. Redistributions of source code must retain the above copyright
     37  *    notice, this list of conditions and the following disclaimer.
     38  * 2. Redistributions in binary form must reproduce the above copyright
     39  *    notice, this list of conditions and the following disclaimer in the
     40  *    documentation and/or other materials provided with the distribution.
     41  * 3. The name of the author may not be used to endorse or promote products
     42  *    derived from this software without specific prior written permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     45  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     46  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     48  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     49  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     50  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     51  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     52  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     54  * SUCH DAMAGE.
     55  */
     56 
     57 /*
     58  * UltraSPARC IOMMU support; used by both the sbus and pci code.
     59  */
     60 
     61 #include <sys/cdefs.h>
     62 __KERNEL_RCSID(0, "$NetBSD: iommu.c,v 1.83 2008/06/04 12:41:41 ad Exp $");
     63 
     64 #include "opt_ddb.h"
     65 
     66 #include <sys/param.h>
     67 #include <sys/extent.h>
     68 #include <sys/malloc.h>
     69 #include <sys/systm.h>
     70 #include <sys/device.h>
     71 #include <sys/proc.h>
     72 
     73 #include <uvm/uvm_extern.h>
     74 
     75 #include <machine/bus.h>
     76 #include <sparc64/sparc64/cache.h>
     77 #include <sparc64/dev/iommureg.h>
     78 #include <sparc64/dev/iommuvar.h>
     79 
     80 #include <machine/autoconf.h>
     81 #include <machine/cpu.h>
     82 
     83 #ifdef DEBUG
     84 #define IDB_BUSDMA	0x1
     85 #define IDB_IOMMU	0x2
     86 #define IDB_INFO	0x4
     87 #define	IDB_SYNC	0x8
     88 int iommudebug = 0x0;
     89 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
     90 #else
     91 #define DPRINTF(l, s)
     92 #endif
     93 
     94 #define iommu_strbuf_flush(i, v) do {					\
     95 	if ((i)->sb_flush)						\
     96 		bus_space_write_8((i)->sb_is->is_bustag, (i)->sb_sb,	\
     97 			STRBUFREG(strbuf_pgflush), (v));		\
     98 	} while (0)
     99 
    100 static	int iommu_strbuf_flush_done(struct strbuf_ctl *);
    101 
    102 /*
    103  * initialise the UltraSPARC IOMMU (SBUS or PCI):
    104  *	- allocate and setup the iotsb.
    105  *	- enable the IOMMU
    106  *	- initialise the streaming buffers (if they exist)
    107  *	- create a private DVMA map.
    108  */
    109 void
    110 iommu_init(char *name, struct iommu_state *is, int tsbsize, uint32_t iovabase)
    111 {
    112 	psize_t size;
    113 	vaddr_t va;
    114 	paddr_t pa;
    115 	struct vm_page *pg;
    116 	struct pglist pglist;
    117 
    118 	/*
    119 	 * Setup the iommu.
    120 	 *
    121 	 * The sun4u iommu is part of the SBUS or PCI controller so we will
    122 	 * deal with it here..
    123 	 *
    124 	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
    125 	 * 0xffffe000, but the starting address depends on the size of the
    126 	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
    127 	 * entry is 8 bytes.  The start of the map can be calculated by
    128 	 * (0xffffe000 << (8 + is->is_tsbsize)).
    129 	 *
    130 	 * But sabre and hummingbird use a different scheme that seems to
    131 	 * be hard-wired, so we read the start and size from the PROM and
    132 	 * just use those values.
    133 	 */
    134 	is->is_cr = (tsbsize << 16) | IOMMUCR_EN;
    135 	is->is_tsbsize = tsbsize;
    136 	if (iovabase == -1) {
    137 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
    138 		is->is_dvmaend = IOTSB_VEND;
    139 	} else {
    140 		is->is_dvmabase = iovabase;
    141 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize);
    142 	}
    143 
    144 	/*
    145 	 * Allocate memory for I/O pagetables.  They need to be physically
    146 	 * contiguous.
    147 	 */
    148 
    149 	size = PAGE_SIZE << is->is_tsbsize;
    150 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
    151 		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
    152 		panic("iommu_init: no memory");
    153 
    154 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
    155 	if (va == 0)
    156 		panic("iommu_init: no memory");
    157 	is->is_tsb = (int64_t *)va;
    158 
    159 	is->is_ptsb = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist));
    160 
    161 	/* Map the pages */
    162 	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
    163 		pa = VM_PAGE_TO_PHYS(pg);
    164 		pmap_kenter_pa(va, pa | PMAP_NVC, VM_PROT_READ | VM_PROT_WRITE);
    165 		va += PAGE_SIZE;
    166 	}
    167 	pmap_update(pmap_kernel());
    168 	memset(is->is_tsb, 0, size);
    169 
    170 #ifdef DEBUG
    171 	if (iommudebug & IDB_INFO)
    172 	{
    173 		/* Probe the iommu */
    174 
    175 		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
    176 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
    177 				offsetof (struct iommureg, iommu_cr)),
    178 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
    179 				offsetof (struct iommureg, iommu_tsb)),
    180 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
    181 				offsetof (struct iommureg, iommu_flush)));
    182 		printf("iommu cr=%llx tsb=%llx\n",
    183 			(unsigned long long)bus_space_read_8(is->is_bustag,
    184 				is->is_iommu,
    185 				offsetof (struct iommureg, iommu_cr)),
    186 			(unsigned long long)bus_space_read_8(is->is_bustag,
    187 				is->is_iommu,
    188 				offsetof (struct iommureg, iommu_tsb)));
    189 		printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
    190 			(unsigned long long)is->is_ptsb);
    191 		delay(1000000); /* 1 s */
    192 	}
    193 #endif
    194 
    195 	/*
    196 	 * now actually start up the IOMMU
    197 	 */
    198 	iommu_reset(is);
    199 
    200 	/*
    201 	 * Now all the hardware's working we need to allocate a dvma map.
    202 	 */
    203 	printf("DVMA map: %x to %x\n",
    204 		(unsigned int)is->is_dvmabase,
    205 		(unsigned int)is->is_dvmaend);
    206 	printf("IOTSB: %llx to %llx\n",
    207 		(unsigned long long)is->is_ptsb,
    208 		(unsigned long long)(is->is_ptsb + size));
    209 	is->is_dvmamap = extent_create(name,
    210 	    is->is_dvmabase, is->is_dvmaend - PAGE_SIZE,
    211 	    M_DEVBUF, 0, 0, EX_NOWAIT);
    212 }
    213 
    214 /*
    215  * Streaming buffers don't exist on the UltraSPARC IIi; we should have
    216  * detected that already and disabled them.  If not, we will notice that
    217  * they aren't there when the STRBUF_EN bit does not remain.
    218  */
    219 void
    220 iommu_reset(struct iommu_state *is)
    221 {
    222 	int i;
    223 	struct strbuf_ctl *sb;
    224 
    225 	/* Need to do 64-bit stores */
    226 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
    227 		is->is_ptsb);
    228 
    229 	/* Enable IOMMU in diagnostic mode */
    230 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_cr),
    231 		is->is_cr|IOMMUCR_DE);
    232 
    233 	for (i = 0; i < 2; i++) {
    234 		if ((sb = is->is_sb[i])) {
    235 
    236 			/* Enable diagnostics mode? */
    237 			bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
    238 				STRBUFREG(strbuf_ctl), STRBUF_EN);
    239 
    240 			/* No streaming buffers? Disable them */
    241 			if (bus_space_read_8(is->is_bustag,
    242 				is->is_sb[i]->sb_sb,
    243 				STRBUFREG(strbuf_ctl)) == 0) {
    244 				is->is_sb[i]->sb_flush = NULL;
    245 			} else {
    246 
    247 				/*
    248 				 * locate the pa of the flush buffer.
    249 				 */
    250 				(void)pmap_extract(pmap_kernel(),
    251 					(vaddr_t)is->is_sb[i]->sb_flush,
    252 					&is->is_sb[i]->sb_flushpa);
    253 			}
    254 		}
    255 	}
    256 }
    257 
    258 /*
    259  * Here are the iommu control routines.
    260  */
    261 void
    262 iommu_enter(struct strbuf_ctl *sb, vaddr_t va, int64_t pa, int flags)
    263 {
    264 	struct iommu_state *is = sb->sb_is;
    265 	int strbuf = (flags & BUS_DMA_STREAMING);
    266 	int64_t tte;
    267 
    268 #ifdef DIAGNOSTIC
    269 	if (va < is->is_dvmabase || va > is->is_dvmaend)
    270 		panic("iommu_enter: va %#lx not in DVMA space", va);
    271 #endif
    272 
    273 	/* Is the streamcache flush really needed? */
    274 	if (sb->sb_flush) {
    275 		iommu_strbuf_flush(sb, va);
    276 		iommu_strbuf_flush_done(sb);
    277 	} else
    278 		/* If we can't flush the strbuf don't enable it. */
    279 		strbuf = 0;
    280 
    281 	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
    282 		!(flags & BUS_DMA_NOCACHE), (strbuf));
    283 #ifdef DEBUG
    284 	tte |= (flags & 0xff000LL)<<(4*8);
    285 #endif
    286 
    287 	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
    288 		       (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
    289 	is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
    290 	bus_space_write_8(is->is_bustag, is->is_iommu,
    291 		IOMMUREG(iommu_flush), va);
    292 	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
    293 		va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
    294 		(void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
    295 		(u_long)tte));
    296 }
    297 
    298 /*
    299  * Find the value of a DVMA address (debug routine).
    300  */
    301 paddr_t
    302 iommu_extract(struct iommu_state *is, vaddr_t dva)
    303 {
    304 	int64_t tte = 0;
    305 
    306 	if (dva >= is->is_dvmabase && dva < is->is_dvmaend)
    307 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
    308 
    309 	if ((tte & IOTTE_V) == 0)
    310 		return ((paddr_t)-1L);
    311 	return (tte & IOTTE_PAMASK);
    312 }
    313 
    314 /*
    315  * iommu_remove: removes mappings created by iommu_enter
    316  *
    317  * Only demap from IOMMU if flag is set.
    318  *
    319  * XXX: this function needs better internal error checking.
    320  */
    321 void
    322 iommu_remove(struct iommu_state *is, vaddr_t va, size_t len)
    323 {
    324 
    325 #ifdef DIAGNOSTIC
    326 	if (va < is->is_dvmabase || va > is->is_dvmaend)
    327 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
    328 	if ((long)(va + len) < (long)va)
    329 		panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
    330 		      (long) va, (long) len);
    331 	if (len & ~0xfffffff)
    332 		panic("iommu_remove: ridiculous len 0x%lx", (u_long)len);
    333 #endif
    334 
    335 	va = trunc_page(va);
    336 	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%lx]@%p\n",
    337 		va, (u_long)IOTSBSLOT(va, is->is_tsbsize),
    338 		&is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)]));
    339 	while (len > 0) {
    340 		DPRINTF(IDB_IOMMU, ("iommu_remove: clearing TSB slot %d "
    341 			"for va %p size %lx\n",
    342 			(int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va,
    343 			(u_long)len));
    344 		if (len <= PAGE_SIZE)
    345 			len = 0;
    346 		else
    347 			len -= PAGE_SIZE;
    348 
    349 		/* XXX Zero-ing the entry would not require RMW */
    350 		is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
    351 		bus_space_write_8(is->is_bustag, is->is_iommu,
    352 			IOMMUREG(iommu_flush), va);
    353 		va += PAGE_SIZE;
    354 	}
    355 }
    356 
    357 static int
    358 iommu_strbuf_flush_done(struct strbuf_ctl *sb)
    359 {
    360 	struct iommu_state *is = sb->sb_is;
    361 	struct timeval cur, flushtimeout;
    362 
    363 #define BUMPTIME(t, usec) { \
    364 	register volatile struct timeval *tp = (t); \
    365 	register long us; \
    366  \
    367 	tp->tv_usec = us = tp->tv_usec + (usec); \
    368 	if (us >= 1000000) { \
    369 		tp->tv_usec = us - 1000000; \
    370 		tp->tv_sec++; \
    371 	} \
    372 }
    373 
    374 	if (!sb->sb_flush)
    375 		return (0);
    376 
    377 	/*
    378 	 * Streaming buffer flushes:
    379 	 *
    380 	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.  If
    381 	 *     we're not on a cache line boundary (64-bits):
    382 	 *   2 Store 0 in flag
    383 	 *   3 Store pointer to flag in flushsync
    384 	 *   4 wait till flushsync becomes 0x1
    385 	 *
    386 	 * If it takes more than .5 sec, something
    387 	 * went wrong.
    388 	 */
    389 
    390 	*sb->sb_flush = 0;
    391 	bus_space_write_8(is->is_bustag, sb->sb_sb,
    392 		STRBUFREG(strbuf_flushsync), sb->sb_flushpa);
    393 
    394 	microtime(&flushtimeout);
    395 	cur = flushtimeout;
    396 	BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
    397 
    398 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flush = %lx "
    399 		"at va = %lx pa = %lx now=%lx:%lx until = %lx:%lx\n",
    400 		(long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
    401 		cur.tv_sec, cur.tv_usec,
    402 		flushtimeout.tv_sec, flushtimeout.tv_usec));
    403 
    404 	/* Bypass non-coherent D$ */
    405 	while ((!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) &&
    406 		timercmp(&cur, &flushtimeout, <=))
    407 		microtime(&cur);
    408 
    409 #ifdef DIAGNOSTIC
    410 	if (!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) {
    411 		printf("iommu_strbuf_flush_done: flush timeout %p, at %p\n",
    412 			(void *)(u_long)*sb->sb_flush,
    413 			(void *)(u_long)sb->sb_flushpa); /* panic? */
    414 #ifdef DDB
    415 		Debugger();
    416 #endif
    417 	}
    418 #endif
    419 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flushed\n"));
    420 	return (*sb->sb_flush);
    421 }
    422 
    423 /*
    424  * IOMMU DVMA operations, common to SBUS and PCI.
    425  */
    426 int
    427 iommu_dvmamap_load(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map,
    428 	void *buf, bus_size_t buflen, struct proc *p, int flags)
    429 {
    430 	struct iommu_state *is = sb->sb_is;
    431 	int s;
    432 	int err;
    433 	bus_size_t sgsize;
    434 	paddr_t curaddr;
    435 	u_long dvmaddr, sgstart, sgend;
    436 	bus_size_t align, boundary, len;
    437 	vaddr_t vaddr = (vaddr_t)buf;
    438 	int seg;
    439 	struct pmap *pmap;
    440 
    441 	if (map->dm_nsegs) {
    442 		/* Already in use?? */
    443 #ifdef DIAGNOSTIC
    444 		printf("iommu_dvmamap_load: map still in use\n");
    445 #endif
    446 		bus_dmamap_unload(t, map);
    447 	}
    448 
    449 	/*
    450 	 * Make sure that on error condition we return "no valid mappings".
    451 	 */
    452 	map->dm_nsegs = 0;
    453 	if (buflen > map->_dm_size) {
    454 		DPRINTF(IDB_BUSDMA,
    455 		    ("iommu_dvmamap_load(): error %d > %d -- "
    456 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
    457 		return (EINVAL);
    458 	}
    459 
    460 	sgsize = round_page(buflen + ((int)vaddr & PGOFSET));
    461 
    462 	/*
    463 	 * A boundary presented to bus_dmamem_alloc() takes precedence
    464 	 * over boundary in the map.
    465 	 */
    466 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
    467 		boundary = map->_dm_boundary;
    468 	align = max(map->dm_segs[0]._ds_align, PAGE_SIZE);
    469 
    470 	/*
    471 	 * If our segment size is larger than the boundary we need to
    472 	 * split the transfer up int little pieces ourselves.
    473 	 */
    474 	s = splhigh();
    475 	err = extent_alloc(is->is_dvmamap, sgsize, align,
    476 	    (sgsize > boundary) ? 0 : boundary,
    477 	    EX_NOWAIT|EX_BOUNDZERO, &dvmaddr);
    478 	splx(s);
    479 
    480 #ifdef DEBUG
    481 	if (err || (dvmaddr == (u_long)-1)) {
    482 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
    483 		    (int)sgsize, flags);
    484 #ifdef DDB
    485 		Debugger();
    486 #endif
    487 	}
    488 #endif
    489 	if (err != 0)
    490 		return (err);
    491 
    492 	if (dvmaddr == (u_long)-1)
    493 		return (ENOMEM);
    494 
    495 	/* Set the active DVMA map */
    496 	map->_dm_dvmastart = dvmaddr;
    497 	map->_dm_dvmasize = sgsize;
    498 
    499 	/*
    500 	 * Now split the DVMA range into segments, not crossing
    501 	 * the boundary.
    502 	 */
    503 	seg = 0;
    504 	sgstart = dvmaddr + (vaddr & PGOFSET);
    505 	sgend = sgstart + buflen - 1;
    506 	map->dm_segs[seg].ds_addr = sgstart;
    507 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: boundary %lx boundary - 1 %lx "
    508 	    "~(boundary - 1) %lx\n", (long)boundary, (long)(boundary - 1),
    509 	    (long)~(boundary - 1)));
    510 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
    511 		/* Oops.  We crossed a boundary.  Split the xfer. */
    512 		len = boundary - (sgstart & (boundary - 1));
    513 		map->dm_segs[seg].ds_len = len;
    514 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
    515 		    "seg %d start %lx size %lx\n", seg,
    516 		    (long)map->dm_segs[seg].ds_addr,
    517 		    (long)map->dm_segs[seg].ds_len));
    518 		if (++seg >= map->_dm_segcnt) {
    519 			/* Too many segments.  Fail the operation. */
    520 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
    521 			    "too many segments %d\n", seg));
    522 			s = splhigh();
    523 			/* How can this fail?  And if it does what can we do? */
    524 			err = extent_free(is->is_dvmamap,
    525 			    dvmaddr, sgsize, EX_NOWAIT);
    526 			map->_dm_dvmastart = 0;
    527 			map->_dm_dvmasize = 0;
    528 			splx(s);
    529 			return (EFBIG);
    530 		}
    531 		sgstart += len;
    532 		map->dm_segs[seg].ds_addr = sgstart;
    533 	}
    534 	map->dm_segs[seg].ds_len = sgend - sgstart + 1;
    535 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
    536 	    "seg %d start %lx size %lx\n", seg,
    537 	    (long)map->dm_segs[seg].ds_addr, (long)map->dm_segs[seg].ds_len));
    538 	map->dm_nsegs = seg + 1;
    539 	map->dm_mapsize = buflen;
    540 
    541 	if (p != NULL)
    542 		pmap = p->p_vmspace->vm_map.pmap;
    543 	else
    544 		pmap = pmap_kernel();
    545 
    546 	for (; buflen > 0; ) {
    547 
    548 		/*
    549 		 * Get the physical address for this page.
    550 		 */
    551 		if (pmap_extract(pmap, (vaddr_t)vaddr, &curaddr) == FALSE) {
    552 #ifdef DIAGNOSTIC
    553 			printf("iommu_dvmamap_load: pmap_extract failed %lx\n", vaddr);
    554 #endif
    555 			bus_dmamap_unload(t, map);
    556 			return (-1);
    557 		}
    558 
    559 		/*
    560 		 * Compute the segment size, and adjust counts.
    561 		 */
    562 		sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET);
    563 		if (buflen < sgsize)
    564 			sgsize = buflen;
    565 
    566 		DPRINTF(IDB_BUSDMA,
    567 		    ("iommu_dvmamap_load: map %p loading va %p "
    568 		    "dva %lx at pa %lx\n",
    569 		    map, (void *)vaddr, (long)dvmaddr,
    570 		    (long)(curaddr & ~(PAGE_SIZE-1))));
    571 		iommu_enter(sb, trunc_page(dvmaddr), trunc_page(curaddr),
    572 		    flags|0x4000);
    573 
    574 		dvmaddr += PAGE_SIZE;
    575 		vaddr += sgsize;
    576 		buflen -= sgsize;
    577 	}
    578 #ifdef DIAGNOSTIC
    579 	for (seg = 0; seg < map->dm_nsegs; seg++) {
    580 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
    581 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
    582 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
    583 			    seg, (long)map->dm_segs[seg].ds_addr,
    584 			    is->is_dvmabase, is->is_dvmaend);
    585 #ifdef DDB
    586 			Debugger();
    587 #endif
    588 		}
    589 	}
    590 #endif
    591 	return (0);
    592 }
    593 
    594 
    595 void
    596 iommu_dvmamap_unload(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map)
    597 {
    598 	struct iommu_state *is = sb->sb_is;
    599 	int error, s;
    600 	bus_size_t sgsize = map->_dm_dvmasize;
    601 
    602 	/* Flush the iommu */
    603 #ifdef DEBUG
    604 	if (!map->_dm_dvmastart) {
    605 		printf("iommu_dvmamap_unload: No dvmastart is zero\n");
    606 #ifdef DDB
    607 		Debugger();
    608 #endif
    609 	}
    610 #endif
    611 	iommu_remove(is, map->_dm_dvmastart, map->_dm_dvmasize);
    612 
    613 	/* Flush the caches */
    614 	bus_dmamap_unload(t->_parent, map);
    615 
    616 	/* Mark the mappings as invalid. */
    617 	map->dm_mapsize = 0;
    618 	map->dm_nsegs = 0;
    619 
    620 	s = splhigh();
    621 	error = extent_free(is->is_dvmamap, map->_dm_dvmastart,
    622 		map->_dm_dvmasize, EX_NOWAIT);
    623 	map->_dm_dvmastart = 0;
    624 	map->_dm_dvmasize = 0;
    625 	splx(s);
    626 	if (error != 0)
    627 		printf("warning: %qd of DVMA space lost\n", (long long)sgsize);
    628 
    629 	/* Clear the map */
    630 }
    631 
    632 
    633 int
    634 iommu_dvmamap_load_raw(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map,
    635 	bus_dma_segment_t *segs, int nsegs, int flags, bus_size_t size)
    636 {
    637 	struct iommu_state *is = sb->sb_is;
    638 	struct vm_page *pg;
    639 	int i, j, s;
    640 	int left;
    641 	int err;
    642 	bus_size_t sgsize;
    643 	paddr_t pa;
    644 	bus_size_t boundary, align;
    645 	u_long dvmaddr, sgstart, sgend;
    646 	struct pglist *pglist;
    647 	int pagesz = PAGE_SIZE;
    648 	int npg = 0; /* DEBUG */
    649 
    650 	if (map->dm_nsegs) {
    651 		/* Already in use?? */
    652 #ifdef DIAGNOSTIC
    653 		printf("iommu_dvmamap_load_raw: map still in use\n");
    654 #endif
    655 		bus_dmamap_unload(t, map);
    656 	}
    657 
    658 	/*
    659 	 * A boundary presented to bus_dmamem_alloc() takes precedence
    660 	 * over boundary in the map.
    661 	 */
    662 	if ((boundary = segs[0]._ds_boundary) == 0)
    663 		boundary = map->_dm_boundary;
    664 
    665 	align = max(segs[0]._ds_align, pagesz);
    666 
    667 	/*
    668 	 * Make sure that on error condition we return "no valid mappings".
    669 	 */
    670 	map->dm_nsegs = 0;
    671 	/* Count up the total number of pages we need */
    672 	pa = segs[0].ds_addr;
    673 	sgsize = 0;
    674 	left = size;
    675 	for (i = 0; left && i < nsegs; i++) {
    676 		if (round_page(pa) != round_page(segs[i].ds_addr))
    677 			sgsize = round_page(sgsize);
    678 		sgsize += min(left, segs[i].ds_len);
    679 		left -= segs[i].ds_len;
    680 		pa = segs[i].ds_addr + segs[i].ds_len;
    681 	}
    682 	sgsize = round_page(sgsize) + PAGE_SIZE; /* XXX reserve extra dvma page */
    683 
    684 	s = splhigh();
    685 	/*
    686 	 * If our segment size is larger than the boundary we need to
    687 	 * split the transfer up into little pieces ourselves.
    688 	 */
    689 	err = extent_alloc(is->is_dvmamap, sgsize, align,
    690 		(sgsize > boundary) ? 0 : boundary,
    691 		((flags & BUS_DMA_NOWAIT) == 0 ? EX_WAITOK : EX_NOWAIT) |
    692 		EX_BOUNDZERO, &dvmaddr);
    693 	splx(s);
    694 
    695 	if (err != 0)
    696 		return (err);
    697 
    698 #ifdef DEBUG
    699 	if (dvmaddr == (u_long)-1)
    700 	{
    701 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) failed!\n",
    702 		    (int)sgsize, flags);
    703 #ifdef DDB
    704 		Debugger();
    705 #endif
    706 	}
    707 #endif
    708 	if (dvmaddr == (u_long)-1)
    709 		return (ENOMEM);
    710 
    711 	/* Set the active DVMA map */
    712 	map->_dm_dvmastart = dvmaddr;
    713 	map->_dm_dvmasize = sgsize;
    714 
    715 	if ((pglist = segs[0]._ds_mlist) == NULL) {
    716 		u_long prev_va = 0UL;
    717 		paddr_t prev_pa = 0;
    718 		int end = 0, offset;
    719 
    720 		/*
    721 		 * This segs is made up of individual physical
    722 		 *  segments, probably by _bus_dmamap_load_uio() or
    723 		 * _bus_dmamap_load_mbuf().  Ignore the mlist and
    724 		 * load each one individually.
    725 		 */
    726 		map->dm_mapsize = size;
    727 
    728 		j = 0;
    729 		for (i = 0; i < nsegs ; i++) {
    730 
    731 			pa = segs[i].ds_addr;
    732 			offset = (pa & PGOFSET);
    733 			pa = trunc_page(pa);
    734 			dvmaddr = trunc_page(dvmaddr);
    735 			left = min(size, segs[i].ds_len);
    736 
    737 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: converting "
    738 				"physseg %d start %lx size %lx\n", i,
    739 				(long)segs[i].ds_addr, (long)segs[i].ds_len));
    740 
    741 			if ((pa == prev_pa) &&
    742 				((offset != 0) || (end != offset))) {
    743 				/* We can re-use this mapping */
    744 				dvmaddr = prev_va;
    745 			}
    746 
    747 			sgstart = dvmaddr + offset;
    748 			sgend = sgstart + left - 1;
    749 
    750 			/* Are the segments virtually adjacent? */
    751 			if ((j > 0) && (end == offset) &&
    752 				((offset == 0) || (pa == prev_pa))) {
    753 				/* Just append to the previous segment. */
    754 				map->dm_segs[--j].ds_len += left;
    755 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
    756 					"appending seg %d start %lx size %lx\n", j,
    757 					(long)map->dm_segs[j].ds_addr,
    758 					(long)map->dm_segs[j].ds_len));
    759 			} else {
    760 				if (j >= map->_dm_segcnt) {
    761 					iommu_dvmamap_unload(t, sb, map);
    762 					return (EFBIG);
    763 				}
    764 				map->dm_segs[j].ds_addr = sgstart;
    765 				map->dm_segs[j].ds_len = left;
    766 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
    767 					"seg %d start %lx size %lx\n", j,
    768 					(long)map->dm_segs[j].ds_addr,
    769 					(long)map->dm_segs[j].ds_len));
    770 			}
    771 			end = (offset + left) & PGOFSET;
    772 
    773 			/* Check for boundary issues */
    774 			while ((sgstart & ~(boundary - 1)) !=
    775 				(sgend & ~(boundary - 1))) {
    776 				/* Need a new segment. */
    777 				map->dm_segs[j].ds_len =
    778 					boundary - (sgstart & (boundary - 1));
    779 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
    780 					"seg %d start %lx size %lx\n", j,
    781 					(long)map->dm_segs[j].ds_addr,
    782 					(long)map->dm_segs[j].ds_len));
    783 				if (++j >= map->_dm_segcnt) {
    784 					iommu_dvmamap_unload(t, sb, map);
    785 					return (EFBIG);
    786 				}
    787 				sgstart = roundup(sgstart, boundary);
    788 				map->dm_segs[j].ds_addr = sgstart;
    789 				map->dm_segs[j].ds_len = sgend - sgstart + 1;
    790 			}
    791 
    792 			if (sgsize == 0)
    793 				panic("iommu_dmamap_load_raw: size botch");
    794 
    795 			/* Now map a series of pages. */
    796 			while (dvmaddr <= sgend) {
    797 				DPRINTF(IDB_BUSDMA,
    798 					("iommu_dvmamap_load_raw: map %p "
    799 						"loading va %lx at pa %lx\n",
    800 						map, (long)dvmaddr,
    801 						(long)(pa)));
    802 				/* Enter it if we haven't before. */
    803 				if (prev_va != dvmaddr)
    804 					iommu_enter(sb, prev_va = dvmaddr,
    805 						prev_pa = pa,
    806 						flags | (++npg << 12));
    807 				dvmaddr += pagesz;
    808 				pa += pagesz;
    809 			}
    810 
    811 			size -= left;
    812 			++j;
    813 		}
    814 
    815 		map->dm_nsegs = j;
    816 #ifdef DIAGNOSTIC
    817 		{ int seg;
    818 	for (seg = 0; seg < map->dm_nsegs; seg++) {
    819 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
    820 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
    821 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
    822 				seg, (long)map->dm_segs[seg].ds_addr,
    823 				is->is_dvmabase, is->is_dvmaend);
    824 #ifdef DDB
    825 			Debugger();
    826 #endif
    827 		}
    828 	}
    829 		}
    830 #endif
    831 		return (0);
    832 	}
    833 
    834 	/*
    835 	 * This was allocated with bus_dmamem_alloc.
    836 	 * The pages are on a `pglist'.
    837 	 */
    838 	map->dm_mapsize = size;
    839 	i = 0;
    840 	sgstart = dvmaddr;
    841 	sgend = sgstart + size - 1;
    842 	map->dm_segs[i].ds_addr = sgstart;
    843 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
    844 		/* Oops.  We crossed a boundary.  Split the xfer. */
    845 		map->dm_segs[i].ds_len = boundary - (sgstart & (boundary - 1));
    846 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
    847 			"seg %d start %lx size %lx\n", i,
    848 			(long)map->dm_segs[i].ds_addr,
    849 			(long)map->dm_segs[i].ds_len));
    850 		if (++i >= map->_dm_segcnt) {
    851 			/* Too many segments.  Fail the operation. */
    852 			s = splhigh();
    853 			/* How can this fail?  And if it does what can we do? */
    854 			err = extent_free(is->is_dvmamap,
    855 				dvmaddr, sgsize, EX_NOWAIT);
    856 			map->_dm_dvmastart = 0;
    857 			map->_dm_dvmasize = 0;
    858 			splx(s);
    859 			return (EFBIG);
    860 		}
    861 		sgstart = roundup(sgstart, boundary);
    862 		map->dm_segs[i].ds_addr = sgstart;
    863 	}
    864 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
    865 			"seg %d start %lx size %lx\n", i,
    866 			(long)map->dm_segs[i].ds_addr, (long)map->dm_segs[i].ds_len));
    867 	map->dm_segs[i].ds_len = sgend - sgstart + 1;
    868 
    869 	TAILQ_FOREACH(pg, pglist, pageq.queue) {
    870 		if (sgsize == 0)
    871 			panic("iommu_dmamap_load_raw: size botch");
    872 		pa = VM_PAGE_TO_PHYS(pg);
    873 
    874 		DPRINTF(IDB_BUSDMA,
    875 		    ("iommu_dvmamap_load_raw: map %p loading va %lx at pa %lx\n",
    876 		    map, (long)dvmaddr, (long)(pa)));
    877 		iommu_enter(sb, dvmaddr, pa, flags|0x8000);
    878 
    879 		dvmaddr += pagesz;
    880 		sgsize -= pagesz;
    881 	}
    882 	map->dm_mapsize = size;
    883 	map->dm_nsegs = i+1;
    884 #ifdef DIAGNOSTIC
    885 	{ int seg;
    886 	for (seg = 0; seg < map->dm_nsegs; seg++) {
    887 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
    888 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
    889 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
    890 				seg, (long)map->dm_segs[seg].ds_addr,
    891 				is->is_dvmabase, is->is_dvmaend);
    892 #ifdef DDB
    893 			Debugger();
    894 #endif
    895 		}
    896 	}
    897 	}
    898 #endif
    899 	return (0);
    900 }
    901 
    902 
    903 /*
    904  * Flush an individual dma segment, returns non-zero if the streaming buffers
    905  * need flushing afterwards.
    906  */
    907 static int
    908 iommu_dvmamap_sync_range(struct strbuf_ctl *sb, vaddr_t va, bus_size_t len)
    909 {
    910 	vaddr_t vaend;
    911 	struct iommu_state *is = sb->sb_is;
    912 
    913 #ifdef DIAGNOSTIC
    914 	if (va < is->is_dvmabase || va > is->is_dvmaend)
    915 		panic("invalid va: %llx", (long long)va);
    916 #endif
    917 
    918 	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
    919 		DPRINTF(IDB_BUSDMA,
    920 			("iommu_dvmamap_sync_range: attempting to flush "
    921 			 "non-streaming entry\n"));
    922 		return (0);
    923 	}
    924 
    925 	vaend = (va + len + PGOFSET) & ~PGOFSET;
    926 	va &= ~PGOFSET;
    927 
    928 #ifdef DIAGNOSTIC
    929 	if (va < is->is_dvmabase || vaend > is->is_dvmaend)
    930 		panic("invalid va range: %llx to %llx (%x to %x)",
    931 		    (long long)va, (long long)vaend,
    932 		    is->is_dvmabase,
    933 		    is->is_dvmaend);
    934 #endif
    935 
    936 	for ( ; va <= vaend; va += PAGE_SIZE) {
    937 		DPRINTF(IDB_BUSDMA,
    938 		    ("iommu_dvmamap_sync_range: flushing va %p\n",
    939 		    (void *)(u_long)va));
    940 		iommu_strbuf_flush(sb, va);
    941 	}
    942 
    943 	return (1);
    944 }
    945 
    946 void
    947 iommu_dvmamap_sync(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map,
    948 	bus_addr_t offset, bus_size_t len, int ops)
    949 {
    950 	bus_size_t count;
    951 	int i, needsflush = 0;
    952 
    953 	if (!sb->sb_flush)
    954 		return;
    955 
    956 	for (i = 0; i < map->dm_nsegs; i++) {
    957 		if (offset < map->dm_segs[i].ds_len)
    958 			break;
    959 		offset -= map->dm_segs[i].ds_len;
    960 	}
    961 
    962 	if (i == map->dm_nsegs)
    963 		panic("iommu_dvmamap_sync: segment too short %llu",
    964 		    (unsigned long long)offset);
    965 
    966 	if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTWRITE)) {
    967 		/* Nothing to do */;
    968 	}
    969 
    970 	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)) {
    971 
    972 		for (; len > 0 && i < map->dm_nsegs; i++) {
    973 			count = MIN(map->dm_segs[i].ds_len - offset, len);
    974 			if (count > 0 &&
    975 			    iommu_dvmamap_sync_range(sb,
    976 				map->dm_segs[i].ds_addr + offset, count))
    977 				needsflush = 1;
    978 			offset = 0;
    979 			len -= count;
    980 		}
    981 #ifdef DIAGNOSTIC
    982 		if (i == map->dm_nsegs && len > 0)
    983 			panic("iommu_dvmamap_sync: leftover %llu",
    984 			    (unsigned long long)len);
    985 #endif
    986 
    987 		if (needsflush)
    988 			iommu_strbuf_flush_done(sb);
    989 	}
    990 }
    991 
    992 int
    993 iommu_dvmamem_alloc(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_size_t size,
    994 	bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
    995 	int nsegs, int *rsegs, int flags)
    996 {
    997 
    998 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx bound %llx "
    999 	   "segp %p flags %d\n", (unsigned long long)size,
   1000 	   (unsigned long long)alignment, (unsigned long long)boundary,
   1001 	   segs, flags));
   1002 	return (bus_dmamem_alloc(t->_parent, size, alignment, boundary,
   1003 	    segs, nsegs, rsegs, flags|BUS_DMA_DVMA));
   1004 }
   1005 
   1006 void
   1007 iommu_dvmamem_free(bus_dma_tag_t t, struct strbuf_ctl *sb,
   1008 	bus_dma_segment_t *segs, int nsegs)
   1009 {
   1010 
   1011 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
   1012 	    segs, nsegs));
   1013 	bus_dmamem_free(t->_parent, segs, nsegs);
   1014 }
   1015 
   1016 /*
   1017  * Map the DVMA mappings into the kernel pmap.
   1018  * Check the flags to see whether we're streaming or coherent.
   1019  */
   1020 int
   1021 iommu_dvmamem_map(bus_dma_tag_t t, struct strbuf_ctl *sb,
   1022 	bus_dma_segment_t *segs, int nsegs, size_t size, void **kvap,
   1023 	int flags)
   1024 {
   1025 	struct vm_page *pg;
   1026 	vaddr_t va;
   1027 	bus_addr_t addr;
   1028 	struct pglist *pglist;
   1029 	int cbit;
   1030 	const uvm_flag_t kmflags =
   1031 	    (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0;
   1032 
   1033 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: segp %p nsegs %d size %lx\n",
   1034 	    segs, nsegs, size));
   1035 
   1036 	/*
   1037 	 * Allocate some space in the kernel map, and then map these pages
   1038 	 * into this space.
   1039 	 */
   1040 	size = round_page(size);
   1041 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY | kmflags);
   1042 	if (va == 0)
   1043 		return (ENOMEM);
   1044 
   1045 	*kvap = (void *)va;
   1046 
   1047 	/*
   1048 	 * digest flags:
   1049 	 */
   1050 	cbit = 0;
   1051 	if (flags & BUS_DMA_COHERENT)	/* Disable vcache */
   1052 		cbit |= PMAP_NVC;
   1053 	if (flags & BUS_DMA_NOCACHE)	/* sideffects */
   1054 		cbit |= PMAP_NC;
   1055 
   1056 	/*
   1057 	 * Now take this and map it into the CPU.
   1058 	 */
   1059 	pglist = segs[0]._ds_mlist;
   1060 	TAILQ_FOREACH(pg, pglist, pageq.queue) {
   1061 #ifdef DIAGNOSTIC
   1062 		if (size == 0)
   1063 			panic("iommu_dvmamem_map: size botch");
   1064 #endif
   1065 		addr = VM_PAGE_TO_PHYS(pg);
   1066 		DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: "
   1067 		    "mapping va %lx at %llx\n", va, (unsigned long long)addr | cbit));
   1068 		pmap_kenter_pa(va, addr | cbit, VM_PROT_READ | VM_PROT_WRITE);
   1069 		va += PAGE_SIZE;
   1070 		size -= PAGE_SIZE;
   1071 	}
   1072 	pmap_update(pmap_kernel());
   1073 	return (0);
   1074 }
   1075 
   1076 /*
   1077  * Unmap DVMA mappings from kernel
   1078  */
   1079 void
   1080 iommu_dvmamem_unmap(bus_dma_tag_t t, struct strbuf_ctl *sb, void *kva,
   1081 	size_t size)
   1082 {
   1083 
   1084 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_unmap: kvm %p size %lx\n",
   1085 	    kva, size));
   1086 
   1087 #ifdef DIAGNOSTIC
   1088 	if ((u_long)kva & PGOFSET)
   1089 		panic("iommu_dvmamem_unmap");
   1090 #endif
   1091 
   1092 	size = round_page(size);
   1093 	pmap_kremove((vaddr_t)kva, size);
   1094 	pmap_update(pmap_kernel());
   1095 	uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY);
   1096 }
   1097