Home | History | Annotate | Line # | Download | only in apple
      1 /* $NetBSD: apple_dart.c,v 1.5 2023/02/24 11:19:15 jmcneill Exp $ */
      2 /*	$OpenBSD: apldart.c,v 1.10 2022/02/27 17:36:52 kettenis Exp $	*/
      3 
      4 /*-
      5  * Copyright (c) 2021 Mark Kettenis <kettenis (at) openbsd.org>
      6  * Copyright (c) 2021 Jared McNeill <jmcneill (at) invisible.ca>
      7  *
      8  * Permission to use, copy, modify, and distribute this software for any
      9  * purpose with or without fee is hereby granted, provided that the above
     10  * copyright notice and this permission notice appear in all copies.
     11  *
     12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     19  */
     20 
     21 //#define APPLE_DART_DEBUG
     22 
     23 #include <sys/cdefs.h>
     24 __KERNEL_RCSID(0, "$NetBSD: apple_dart.c,v 1.5 2023/02/24 11:19:15 jmcneill Exp $");
     25 
     26 #include <sys/param.h>
     27 #include <sys/bus.h>
     28 #include <sys/device.h>
     29 #include <sys/intr.h>
     30 #include <sys/kernel.h>
     31 #include <sys/systm.h>
     32 #include <sys/kmem.h>
     33 #include <sys/vmem.h>
     34 
     35 #include <arm/cpufunc.h>
     36 
     37 #include <dev/fdt/fdtvar.h>
     38 /*
     39  * This driver largely ignores stream IDs and simply uses a single
     40  * translation table for all the devices that it serves.  This is good
     41  * enough for the PCIe host bridge that serves the on-board devices on
     42  * the current generation Apple Silicon Macs as these only have a
     43  * single PCIe device behind each DART.
     44  */
     45 
     46 /*
     47  * DART registers
     48  */
     49 #define	DART_PARAMS2		0x0004
     50 #define	 DART_PARAMS2_BYPASS_SUPPORT	__BIT(0)
     51 #define	DART_TLB_OP		0x0020
     52 #define	 DART_TLB_OP_BUSY		__BIT(2)
     53 #define	 DART_TLB_OP_FLUSH		__BIT(20)
     54 #define	DART_TLB_OP_SIDMASK	0x0034
     55 #define	DART_ERR_STATUS		0x0040
     56 #define	 DART_ERR_FLAG		__BIT(31)
     57 #define	 DART_ERR_STREAM_MASK	__BITS(27, 24)
     58 #define	 DART_ERR_CODE_MASK	__BITS(11, 0)
     59 #define	 DART_ERR_READ_FAULT	__BIT(4)
     60 #define	 DART_ERR_WRITE_FAULT	__BIT(3)
     61 #define	 DART_ERR_NOPTE		__BIT(2)
     62 #define	 DART_ERR_NOPMD		__BIT(1)
     63 #define	 DART_ERR_NOTTBR	__BIT(0)
     64 #define	DART_ERR_ADDRL		0x0050
     65 #define	DART_ERR_ADDRH		0x0054
     66 #define	DART_CONFIG		0x0060
     67 #define	 DART_CONFIG_LOCK		__BIT(15)
     68 #define	DART_TCR(sid)		(0x0100 + (sid) * 0x4)
     69 #define	 DART_TCR_TRANSLATE_ENABLE	__BIT(7)
     70 #define	 DART_TCR_BYPASS_DART		__BIT(8)
     71 #define	 DART_TCR_BYPASS_DAPF		__BIT(12)
     72 #define	DART_TTBR(sid, idx)	(0x0200 + (sid) * 0x10 + (idx) * 0x4)
     73 #define	 DART_TTBR_VALID		__BIT(31)
     74 #define	 DART_TTBR_SHIFT		12
     75 
     76 #define	DART_NUM_STREAMS	16
     77 #define	DART_ALL_STREAMS	((1 << DART_NUM_STREAMS) - 1)
     78 
     79 #define	DART_APERTURE_START	0x00100000
     80 #define	DART_APERTURE_SIZE	0x3fe00000
     81 #define	DART_PAGE_SIZE		16384
     82 #define	DART_PAGE_MASK		(DART_PAGE_SIZE - 1)
     83 
     84 /*
     85  * Some hardware (e.g. bge(4)) will always use (aligned) 64-bit memory
     86  * access.  To make sure this doesn't fault, round the subpage limits
     87  * down and up accordingly.
     88  */
     89 #define	DART_OFFSET_MASK	7
     90 
     91 #define	DART_L1_TABLE		0x3
     92 #define	DART_L2_INVAL		0x0
     93 #define	DART_L2_VALID		__BIT(0)
     94 #define	DART_L2_FULL_PAGE	__BIT(1)
     95 
     96 #define	DART_L2_START_MASK	__BITS(63, 52)
     97 #define	DART_L2_END_MASK	__BITS(51, 40)
     98 #define	DART_L2_SUBPAGE(addr)	__SHIFTOUT((addr), __BITS(13, 2))
     99 #define	DART_L2_START(addr)	__SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_START_MASK)
    100 #define	DART_L2_END(addr)	__SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_END_MASK)
    101 
    102 #define	DART_ROUND_PAGE(pa)	(((pa) + DART_PAGE_MASK) & ~DART_PAGE_MASK)
    103 #define	DART_TRUNC_PAGE(pa)	((pa) & ~DART_PAGE_MASK)
    104 #define	DART_ROUND_OFFSET(pa)	(((pa) + DART_OFFSET_MASK) & ~DART_OFFSET_MASK)
    105 #define	DART_TRUNC_OFFSET(pa)	((pa) & ~DART_OFFSET_MASK)
    106 
    107 static const struct device_compatible_entry compat_data[] = {
    108 	{ .compat = "apple,dart-m1",		.value = 16 },
    109 	{ .compat = "apple,t8103-dart",		.value = 16 },
    110 	DEVICE_COMPAT_EOL
    111 };
    112 
    113 static struct arm32_dma_range apple_dart_dma_ranges[] = {
    114 	[0] = {
    115 		.dr_sysbase = 0,
    116 		.dr_busbase = 0,
    117 		.dr_len = UINTPTR_MAX,
    118 		.dr_flags = _BUS_DMAMAP_COHERENT,
    119 	}
    120 };
    121 
    122 struct apple_dart_map_state {
    123 	bus_addr_t ams_dva;
    124 	bus_size_t ams_len;
    125 };
    126 
    127 struct apple_dart_dma {
    128 	bus_dmamap_t dma_map;
    129 	bus_dma_segment_t dma_seg;
    130 	bus_size_t dma_size;
    131 	void *dma_kva;
    132 };
    133 
    134 #define	DART_DMA_MAP(_dma)	((_dma)->dma_map)
    135 #define	DART_DMA_LEN(_dma)	((_dma)->dma_size)
    136 #define	DART_DMA_DVA(_dma)	((_dma)->dma_map->dm_segs[0].ds_addr)
    137 #define	DART_DMA_KVA(_dma)	((_dma)->dma_kva)
    138 
    139 struct apple_dart_softc {
    140 	device_t sc_dev;
    141 	int sc_phandle;
    142 	bus_space_tag_t sc_bst;
    143 	bus_space_handle_t sc_bsh;
    144 	bus_dma_tag_t sc_dmat;
    145 
    146 	uint64_t sc_sid_mask;
    147 	u_int sc_nsid;
    148 
    149 	vmem_t *sc_dvamap;
    150 
    151 	struct apple_dart_dma *sc_l1;
    152 	struct apple_dart_dma **sc_l2;
    153 	u_int sc_nl2;
    154 
    155 	struct arm32_bus_dma_tag sc_bus_dmat;
    156 };
    157 
    158 #define DART_READ(sc, reg) \
    159 	bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg))
    160 #define	DART_WRITE(sc, reg, val) \
    161 	bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val))
    162 
    163 static void
    164 apple_dart_flush_tlb(struct apple_dart_softc *sc)
    165 {
    166 	dsb(sy);
    167 	isb();
    168 
    169 	DART_WRITE(sc, DART_TLB_OP_SIDMASK, sc->sc_sid_mask);
    170 	DART_WRITE(sc, DART_TLB_OP, DART_TLB_OP_FLUSH);
    171 	while ((DART_READ(sc, DART_TLB_OP) & DART_TLB_OP_BUSY) != 0) {
    172 		__asm volatile ("yield" ::: "memory");
    173 	}
    174 }
    175 
    176 static struct apple_dart_dma *
    177 apple_dart_dma_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align)
    178 {
    179 	struct apple_dart_dma *dma;
    180 	int nsegs, error;
    181 
    182 	dma = kmem_zalloc(sizeof(*dma), KM_SLEEP);
    183 	dma->dma_size = size;
    184 
    185 	error = bus_dmamem_alloc(dmat, size, align, 0, &dma->dma_seg, 1,
    186 	    &nsegs, BUS_DMA_WAITOK);
    187 	if (error != 0) {
    188 		goto destroy;
    189 	}
    190 
    191 	error = bus_dmamem_map(dmat, &dma->dma_seg, nsegs, size,
    192 	    &dma->dma_kva, BUS_DMA_WAITOK | BUS_DMA_NOCACHE);
    193 	if (error != 0) {
    194 		goto free;
    195 	}
    196 
    197 	error = bus_dmamap_create(dmat, size, 1, size, 0,
    198 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
    199 	if (error != 0) {
    200 		goto dmafree;
    201 	}
    202 
    203 	error = bus_dmamap_load(dmat, dma->dma_map, dma->dma_kva, size,
    204 	    NULL, BUS_DMA_WAITOK);
    205 	if (error != 0) {
    206 		goto unmap;
    207 	}
    208 
    209 	memset(dma->dma_kva, 0, size);
    210 
    211 	return dma;
    212 
    213 destroy:
    214 	bus_dmamap_destroy(dmat, dma->dma_map);
    215 unmap:
    216 	bus_dmamem_unmap(dmat, dma->dma_kva, size);
    217 free:
    218 	bus_dmamem_free(dmat, &dma->dma_seg, 1);
    219 dmafree:
    220 	kmem_free(dma, sizeof(*dma));
    221 	return NULL;
    222 }
    223 
    224 static int
    225 apple_dart_intr(void *priv)
    226 {
    227 	struct apple_dart_softc * const sc = priv;
    228 	char fdt_path[128];
    229 	uint64_t addr;
    230 	uint32_t status;
    231 
    232 	status = DART_READ(sc, DART_ERR_STATUS);
    233 	addr  = __SHIFTIN(DART_READ(sc, DART_ERR_ADDRL), __BITS(31, 0));
    234 	addr |= __SHIFTIN(DART_READ(sc, DART_ERR_ADDRH), __BITS(63, 32));
    235 	DART_WRITE(sc, DART_ERR_STATUS, status);
    236 
    237 	if ((status & DART_ERR_FLAG) == 0)
    238 		return 1;
    239 
    240 #ifdef APPLE_DART_DEBUG
    241 	printf("%s: status %#"PRIx32"\n", __func__, status);
    242 	printf("%s: addrl  %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRL));
    243 	printf("%s: addrh  %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRH));
    244 #endif
    245 
    246 	const char *reason = NULL;
    247 	int32_t code = __SHIFTOUT(status, DART_ERR_CODE_MASK);
    248 	switch (code) {
    249 	case DART_ERR_NOTTBR:
    250 	    reason = "no ttbr for address";
    251 	    break;
    252 	case DART_ERR_NOPMD:
    253 	    reason = "no pmd for address";
    254 	    break;
    255 	case DART_ERR_NOPTE:
    256 	    reason = "no pte for address";
    257 	    break;
    258 	case DART_ERR_WRITE_FAULT:
    259 	    reason = "write fault";
    260 	    break;
    261 	case DART_ERR_READ_FAULT:
    262 	    reason = "read fault";
    263 	    break;
    264 	}
    265 	fdtbus_get_path(sc->sc_phandle, fdt_path, sizeof(fdt_path));
    266 
    267 	printf("%s (%s): error addr 0x%016lx status 0x%08x: %s\n",
    268 	    device_xname(sc->sc_dev), fdt_path, addr, status, reason);
    269 
    270 	return 1;
    271 }
    272 
    273 static volatile uint64_t *
    274 apple_dart_lookup_tte(struct apple_dart_softc *sc, bus_addr_t dva)
    275 {
    276 	int idx = dva / DART_PAGE_SIZE;
    277 	int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t));
    278 	int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t));
    279 	volatile uint64_t *l2 = DART_DMA_KVA(sc->sc_l2[l2_idx]);
    280 
    281 	return &l2[tte_idx];
    282 }
    283 
    284 static void
    285 apple_dart_unload_map(struct apple_dart_softc *sc, bus_dmamap_t map)
    286 {
    287 	struct apple_dart_map_state *ams = map->_dm_iommu;
    288 	volatile uint64_t *tte;
    289 	int seg;
    290 
    291 	/* For each segment */
    292 	for (seg = 0; seg < map->dm_nsegs; seg++) {
    293 		u_long len, dva;
    294 
    295 		if (ams[seg].ams_len == 0) {
    296 			continue;
    297 		}
    298 
    299 		dva = ams[seg].ams_dva;
    300 		len = ams[seg].ams_len;
    301 
    302 		while (len > 0) {
    303 			tte = apple_dart_lookup_tte(sc, dva);
    304 			*tte = DART_L2_INVAL;
    305 
    306 			dva += DART_PAGE_SIZE;
    307 			len -= DART_PAGE_SIZE;
    308 		}
    309 
    310 		vmem_xfree(sc->sc_dvamap, ams[seg].ams_dva, ams[seg].ams_len);
    311 
    312 		ams[seg].ams_dva = 0;
    313 		ams[seg].ams_len = 0;
    314 	}
    315 
    316 	apple_dart_flush_tlb(sc);
    317 }
    318 
    319 static int
    320 apple_dart_load_map(struct apple_dart_softc *sc, bus_dmamap_t map)
    321 {
    322 	struct apple_dart_map_state *ams = map->_dm_iommu;
    323 	volatile uint64_t *tte;
    324 	int seg, error;
    325 
    326 	/* For each segment */
    327 	for (seg = 0; seg < map->dm_nsegs; seg++) {
    328 		paddr_t pa = map->dm_segs[seg]._ds_paddr;
    329 		psize_t off = pa - DART_TRUNC_PAGE(pa);
    330 		u_long len, dva;
    331 
    332 		len = DART_ROUND_PAGE(map->dm_segs[seg].ds_len + off);
    333 
    334 #ifdef APPLE_DART_DEBUG
    335 		device_printf(sc->sc_dev, "load pa=%#lx off=%lu len=%lu ",
    336 		    pa, off, len);
    337 #endif
    338 
    339 		error = vmem_xalloc(sc->sc_dvamap, len, DART_PAGE_SIZE, 0,
    340 		    0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_BESTFIT|VM_NOSLEEP,
    341 		    &dva);
    342 		if (error != 0) {
    343 			apple_dart_unload_map(sc, map);
    344 #ifdef APPLE_DART_DEBUG
    345 			printf("error=%d\n", error);
    346 #endif
    347 			return error;
    348 		}
    349 
    350 #ifdef APPLE_DART_DEBUG
    351 		printf("dva=%#lx\n", dva);
    352 #endif
    353 
    354 		ams[seg].ams_dva = dva;
    355 		ams[seg].ams_len = len;
    356 
    357 		map->dm_segs[seg].ds_addr = dva + off;
    358 
    359 		pa = DART_TRUNC_PAGE(pa);
    360 		paddr_t start = DART_TRUNC_OFFSET(off);
    361 		paddr_t end = DART_PAGE_MASK;
    362 		while (len > 0) {
    363 			tte = apple_dart_lookup_tte(sc, dva);
    364 			if (len < DART_PAGE_SIZE)
    365 				end = DART_ROUND_OFFSET(len) - 1;
    366 
    367 			*tte = pa | DART_L2_VALID |
    368 			    DART_L2_START(start) | DART_L2_END(end);
    369 #ifdef APPLE_DART_DEBUG
    370 			printf("tte %p = %"PRIx64"\n", tte, *tte);
    371 #endif
    372 			pa += DART_PAGE_SIZE;
    373 			dva += DART_PAGE_SIZE;
    374 			len -= DART_PAGE_SIZE;
    375 			start = 0;
    376 		}
    377 	}
    378 
    379 	apple_dart_flush_tlb(sc);
    380 
    381 	return 0;
    382 }
    383 
    384 static int
    385 apple_dart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
    386     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap)
    387 {
    388 	struct apple_dart_softc *sc = t->_cookie;
    389 	struct apple_dart_map_state *ams;
    390 	bus_dmamap_t map;
    391 	int error;
    392 
    393 	error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size, nsegments,
    394 	    maxsegsz, boundary, flags, &map);
    395 	if (error != 0) {
    396 		return error;
    397 	}
    398 
    399 	ams = kmem_zalloc(map->_dm_segcnt * sizeof(*ams),
    400 	    (flags & BUS_DMA_NOWAIT) != 0 ? KM_NOSLEEP : KM_SLEEP);
    401 	if (ams == NULL) {
    402 		sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
    403 		return ENOMEM;
    404 	}
    405 
    406 	map->_dm_iommu = ams;
    407 	*dmamap = map;
    408 	return 0;
    409 }
    410 
    411 static void
    412 apple_dart_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
    413 {
    414 	struct apple_dart_softc *sc = t->_cookie;
    415 	struct apple_dart_map_state *ams = map->_dm_iommu;
    416 
    417 	kmem_free(ams, map->_dm_segcnt * sizeof(*ams));
    418 	sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
    419 }
    420 
    421 static int
    422 apple_dart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
    423     size_t buflen, struct proc *p, int flags)
    424 {
    425 	struct apple_dart_softc *sc = t->_cookie;
    426 	int error;
    427 
    428 	error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map,
    429 	    buf, buflen, p, flags);
    430 	if (error != 0) {
    431 		return error;
    432 	}
    433 
    434 	error = apple_dart_load_map(sc, map);
    435 	if (error != 0) {
    436 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
    437 	}
    438 
    439 	return error;
    440 }
    441 
    442 static int
    443 apple_dart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map,
    444     struct mbuf *m, int flags)
    445 {
    446 	struct apple_dart_softc *sc = t->_cookie;
    447 	int error;
    448 
    449 	error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map,
    450 	    m, flags);
    451 	if (error != 0) {
    452 		return error;
    453 	}
    454 
    455 	error = apple_dart_load_map(sc, map);
    456 	if (error != 0) {
    457 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
    458 	}
    459 
    460 	return error;
    461 }
    462 
    463 static int
    464 apple_dart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map,
    465     struct uio *uio, int flags)
    466 {
    467 	struct apple_dart_softc *sc = t->_cookie;
    468 	int error;
    469 
    470 	error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map,
    471 	    uio, flags);
    472 	if (error != 0) {
    473 		return error;
    474 	}
    475 
    476 	error = apple_dart_load_map(sc, map);
    477 	if (error != 0) {
    478 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
    479 	}
    480 
    481 	return error;
    482 }
    483 
    484 static int
    485 apple_dart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
    486     bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
    487 {
    488 	struct apple_dart_softc *sc = t->_cookie;
    489 	int error;
    490 
    491 	error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map,
    492 	    segs, nsegs, size, flags);
    493 	if (error != 0) {
    494 		return error;
    495 	}
    496 
    497 	error = apple_dart_load_map(sc, map);
    498 	if (error != 0) {
    499 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
    500 	}
    501 
    502 	return error;
    503 }
    504 
    505 static void
    506 apple_dart_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
    507 {
    508 	struct apple_dart_softc *sc = t->_cookie;
    509 
    510 	apple_dart_unload_map(sc, map);
    511 	sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
    512 }
    513 
    514 static bus_dma_tag_t
    515 apple_dart_iommu_map(device_t dev, const u_int *data, bus_dma_tag_t dmat)
    516 {
    517 	struct apple_dart_softc * const sc = device_private(dev);
    518 
    519 	return &sc->sc_bus_dmat;
    520 }
    521 
    522 const struct fdtbus_iommu_func apple_dart_iommu_funcs = {
    523 	.map = apple_dart_iommu_map,
    524 };
    525 
    526 static int
    527 apple_dart_match(device_t parent, cfdata_t cf, void *aux)
    528 {
    529 	struct fdt_attach_args * const faa = aux;
    530 
    531 	return of_compatible_match(faa->faa_phandle, compat_data);
    532 }
    533 
    534 static void
    535 apple_dart_attach(device_t parent, device_t self, void *aux)
    536 {
    537 	struct apple_dart_softc * const sc = device_private(self);
    538 	struct fdt_attach_args * const faa = aux;
    539 	const int phandle = faa->faa_phandle;
    540 	char intrstr[128];
    541 	volatile uint64_t *l1;
    542 	bus_addr_t addr;
    543 	bus_size_t size;
    544 	u_int sid, idx;
    545 	paddr_t pa;
    546 	void *ih;
    547 
    548 	if (fdtbus_get_reg(phandle, 0, &addr, &size) != 0) {
    549 		aprint_error(": couldn't get registers\n");
    550 		return;
    551 	}
    552 	if (!fdtbus_intr_str(phandle, 0, intrstr, sizeof(intrstr))) {
    553 		aprint_error(": couldn't decode interrupt\n");
    554 		return;
    555 	}
    556 
    557 	sc->sc_dev = self;
    558 	sc->sc_phandle = phandle;
    559 	sc->sc_dmat = faa->faa_dmat;
    560 	sc->sc_bst = faa->faa_bst;
    561 	if (bus_space_map(sc->sc_bst, addr, size, 0, &sc->sc_bsh) != 0) {
    562 		aprint_error(": couldn't map registers\n");
    563 		return;
    564 	}
    565 
    566 	/* Skip locked DARTs for now. */
    567 	uint32_t config = DART_READ(sc, DART_CONFIG);
    568 	if (config & DART_CONFIG_LOCK) {
    569 		aprint_naive("\n");
    570 		aprint_normal(": locked\n");
    571 		return;
    572 	}
    573 
    574 	/*
    575 	 * Use bypass mode if supported.  This avoids an issue with
    576 	 * the USB3 controllers which need mappings entered into two
    577 	 * IOMMUs, which is somewhat difficult to implement with our
    578 	 * current kernel interfaces.
    579 	 */
    580 	uint32_t params2 = DART_READ(sc, DART_PARAMS2);
    581 	if (params2 & DART_PARAMS2_BYPASS_SUPPORT) {
    582 		for (sid = 0; sid < DART_NUM_STREAMS; sid++) {
    583 			DART_WRITE(sc, DART_TCR(sid),
    584 			    DART_TCR_BYPASS_DART | DART_TCR_BYPASS_DAPF);
    585 		}
    586 		aprint_naive("\n");
    587 		aprint_normal(": bypass\n");
    588 		return;
    589 	}
    590 
    591 	sc->sc_nsid = of_compatible_lookup(phandle, compat_data)->value;
    592 	sc->sc_sid_mask = __MASK(sc->sc_nsid);
    593 
    594 	aprint_naive("\n");
    595 	aprint_normal(": Apple DART @ %#lx/%#lx, %u SIDs (mask 0x%lx)\n",
    596 	    addr, size, sc->sc_nsid, sc->sc_sid_mask);
    597 
    598 	KASSERT(sc->sc_nsid == 16);
    599 	KASSERT(sc->sc_sid_mask == 0xffff);
    600 
    601 	sc->sc_dvamap = vmem_create(device_xname(self),
    602 	    DART_APERTURE_START, DART_APERTURE_SIZE, DART_PAGE_SIZE,
    603 	    NULL, NULL, NULL, 0, VM_SLEEP, IPL_HIGH);
    604 	if (sc->sc_dvamap == NULL) {
    605 		aprint_error_dev(self, "couldn't allocate DVA map\n");
    606 		return;
    607 	}
    608 
    609 	/* Disable translations */
    610 	for (sid = 0; sid < sc->sc_nsid; sid++) {
    611 		DART_WRITE(sc, DART_TCR(sid), 0);
    612 	}
    613 
    614 	/* Remove page tables */
    615 	for (sid = 0; sid < sc->sc_nsid; sid++) {
    616 		for (idx = 0; idx < 4; idx++) {
    617 			DART_WRITE(sc, DART_TTBR(sid, idx), 0);
    618 		}
    619 	}
    620 	apple_dart_flush_tlb(sc);
    621 
    622 	/*
    623 	 * Build translation tables. We pre-allocate the translation
    624 	 * tables for the entire aperture such that we don't have to worry
    625 	 * about growing them in an mpsafe manner later.
    626 	 *
    627 	 * Cover the entire address space [0, ..._START + ..._SIZE) even if vmem
    628 	 * only allocates from [..._START, ..._START + ...+SIZE)
    629 	 */
    630 
    631 	const u_int ntte = howmany(DART_APERTURE_START + DART_APERTURE_SIZE - 1,
    632 				   DART_PAGE_SIZE);
    633 	const u_int nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t));
    634 	const u_int nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t));
    635 
    636 	sc->sc_l1 = apple_dart_dma_alloc(sc->sc_dmat,
    637 	    nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE);
    638 	if (sc->sc_l1 == NULL) {
    639 		aprint_error_dev(self, "couldn't allocate L1 tables\n");
    640 		return;
    641 	}
    642 	sc->sc_l2 = kmem_zalloc(nl2 * sizeof(*sc->sc_l2), KM_SLEEP);
    643 	sc->sc_nl2 = nl2;
    644 
    645 	l1 = DART_DMA_KVA(sc->sc_l1);
    646 	for (idx = 0; idx < nl2; idx++) {
    647 		sc->sc_l2[idx] = apple_dart_dma_alloc(sc->sc_dmat,
    648 		    DART_PAGE_SIZE, DART_PAGE_SIZE);
    649 		if (sc->sc_l2[idx] == NULL) {
    650 			aprint_error_dev(self,
    651 			    "couldn't allocate L2 tables\n");
    652 			return;
    653 		}
    654 
    655 		l1[idx] = DART_DMA_DVA(sc->sc_l2[idx]) | DART_L1_TABLE;
    656 #ifdef APPLE_DART_DEBUG
    657 		printf("l1[%d] (%p) = %"PRIx64"\n", idx, &l1[idx], l1[idx]);
    658 #endif
    659 	}
    660 
    661 	/* Install page tables */
    662 	for (sid = 0; sid < sc->sc_nsid; sid++) {
    663 		pa = DART_DMA_DVA(sc->sc_l1);
    664 		for (idx = 0; idx < nl1; idx++) {
    665 			KASSERTMSG(__SHIFTOUT(pa, __BITS(DART_TTBR_SHIFT - 1, 0)) == 0,
    666 			    "TTBR pa is not correctly aligned %" PRIxPADDR, pa);
    667 
    668 			DART_WRITE(sc, DART_TTBR(sid, idx),
    669 			    (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID);
    670 			pa += DART_PAGE_SIZE;
    671 #ifdef APPLE_DART_DEBUG
    672 			printf("writing %"PRIx64" to %"PRIx32"\n",
    673 			    (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID,
    674 			    DART_TTBR(sid, idx));
    675 #endif
    676 		}
    677 	}
    678 	apple_dart_flush_tlb(sc);
    679 
    680 	/* Enable translations */
    681 	for (sid = 0; sid < sc->sc_nsid; sid++) {
    682 		DART_WRITE(sc, DART_TCR(sid), DART_TCR_TRANSLATE_ENABLE);
    683 	}
    684 
    685 	ih = fdtbus_intr_establish_xname(phandle, 0, IPL_HIGH, FDT_INTR_MPSAFE,
    686 	    apple_dart_intr, sc, device_xname(self));
    687 	if (ih == NULL) {
    688 		aprint_error_dev(self, "couldn't establish interrupt on %s\n",
    689 		    intrstr);
    690 		return;
    691 	}
    692 	aprint_normal_dev(self, "interrupting on %s\n", intrstr);
    693 
    694 	/* Setup bus DMA tag */
    695 	sc->sc_bus_dmat = *sc->sc_dmat;
    696 	sc->sc_bus_dmat._ranges = apple_dart_dma_ranges;
    697 	sc->sc_bus_dmat._nranges = 1;
    698 	sc->sc_bus_dmat._cookie = sc;
    699 	sc->sc_bus_dmat._dmamap_create = apple_dart_dmamap_create;
    700 	sc->sc_bus_dmat._dmamap_destroy = apple_dart_dmamap_destroy;
    701 	sc->sc_bus_dmat._dmamap_load = apple_dart_dmamap_load;
    702 	sc->sc_bus_dmat._dmamap_load_mbuf = apple_dart_dmamap_load_mbuf;
    703 	sc->sc_bus_dmat._dmamap_load_uio = apple_dart_dmamap_load_uio;
    704 	sc->sc_bus_dmat._dmamap_load_raw = apple_dart_dmamap_load_raw;
    705 	sc->sc_bus_dmat._dmamap_unload = apple_dart_dmamap_unload;
    706 
    707 	fdtbus_register_iommu(self, phandle, &apple_dart_iommu_funcs);
    708 }
    709 
    710 CFATTACH_DECL_NEW(apple_dart, sizeof(struct apple_dart_softc),
    711 	apple_dart_match, apple_dart_attach, NULL, NULL);
    712