1 /** 2 * Copyright (c) 2010-2012 Broadcom. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions, and the following disclaimer, 9 * without modification. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The names of the above-listed copyright holders may not be used 14 * to endorse or promote products derived from this software without 15 * specific prior written permission. 16 * 17 * ALTERNATIVELY, this software may be distributed under the terms of the 18 * GNU General Public License ("GPL") version 2, as published by the Free 19 * Software Foundation. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <interface/compat/vchi_bsd.h> 35 36 #include <sys/param.h> 37 #include <sys/malloc.h> 38 #include <sys/bus.h> 39 #include <sys/kmem.h> 40 41 #include <linux/completion.h> 42 43 #include <uvm/uvm_extern.h> 44 45 #include <arm/cpufunc.h> 46 47 #include <arch/arm/broadcom/bcm2835_mbox.h> 48 #include <arch/arm/broadcom/bcm2835var.h> 49 50 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) 51 52 #define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0 53 54 #define IS_USER_ADDRESS(va) \ 55 ((vaddr_t)(va) >= VM_MIN_ADDRESS && (vaddr_t)(va) < VM_MAX_ADDRESS) 56 57 #include "vchiq_arm.h" 58 #include "vchiq_2835.h" 59 #include "vchiq_netbsd.h" 60 #include "vchiq_connected.h" 61 62 #define VCPAGE_OFFSET 0x0fff 63 #define VCPAGE_SHIFT 12 64 65 #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2) 66 67 typedef struct vchiq_2835_state_struct { 68 int inited; 69 VCHIQ_ARM_STATE_T arm_state; 70 } VCHIQ_2835_ARM_STATE_T; 71 72 /* BSD DMA */ 73 static bus_dma_tag_t dma_tag; 74 static bus_dmamap_t dma_map; 75 76 static unsigned int g_cache_line_size = CACHE_LINE_SIZE; 77 static unsigned int g_fragments_size; 78 static char *g_fragments_base; 79 static char *g_free_fragments; 80 81 struct semaphore g_free_fragments_sema; 82 static struct semaphore g_free_fragments_mutex; 83 84 void 85 vchiq_platform_attach(bus_dma_tag_t tag) 86 { 87 dma_tag = tag; 88 } 89 90 int __init 91 vchiq_platform_init(VCHIQ_STATE_T *state) 92 { 93 VCHIQ_SLOT_ZERO_T *vchiq_slot_zero; 94 bus_dma_segment_t dma_segs[1]; 95 int dma_nsegs; 96 void *slot_mem; 97 bus_addr_t slot_phys; 98 int slot_mem_size, frag_mem_size; 99 int err; 100 int i; 101 102 _sema_init(&g_free_fragments_mutex, 1); 103 104 g_cache_line_size = 32; 105 106 g_fragments_size = 2 * g_cache_line_size; 107 108 /* Allocate space for the channels in coherent memory */ 109 slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); 110 frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS); 111 112 dma_nsegs = __arraycount(dma_segs); 113 err = bus_dmamem_alloc(dma_tag, 114 slot_mem_size + frag_mem_size, PAGE_SIZE, 0, 115 dma_segs, dma_nsegs, &dma_nsegs, BUS_DMA_WAITOK); 116 if (err) { 117 vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory"); 118 err = -ENOMEM; 119 goto failed_alloc; 120 } 121 122 err = bus_dmamem_map(dma_tag, 123 dma_segs, dma_nsegs, slot_mem_size + frag_mem_size, 124 (void **)&slot_mem, BUS_DMA_COHERENT | BUS_DMA_WAITOK); 125 if (err) { 126 vchiq_log_error(vchiq_core_log_level, "Unable to map channel memory"); 127 err = -ENOMEM; 128 goto failed_alloc; 129 } 130 131 err = bus_dmamap_create(dma_tag, 132 slot_mem_size + frag_mem_size, 1, /* maxsize, nsegments */ 133 slot_mem_size + frag_mem_size, 0, /* maxsegsize, boundary */ 134 BUS_DMA_WAITOK, 135 &dma_map); 136 if (err) { 137 vchiq_log_error(vchiq_core_log_level, "Unable to create DMA map"); 138 err = -ENOMEM; 139 goto failed_alloc; 140 } 141 142 err = bus_dmamap_load(dma_tag, dma_map, slot_mem, 143 slot_mem_size + frag_mem_size, NULL, BUS_DMA_WAITOK); 144 if (err) { 145 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map (%d)", err); 146 err = -ENOMEM; 147 goto failed_load; 148 } 149 slot_phys = dma_map->dm_segs[0].ds_addr; 150 151 vchiq_log_info(vchiq_arm_log_level, 152 "%s: slot_phys = %lx\n", __func__, slot_phys); 153 154 WARN_ON(((uintptr_t)slot_mem & (PAGE_SIZE - 1)) != 0); 155 156 vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size); 157 if (!vchiq_slot_zero) { 158 err = -EINVAL; 159 goto failed_init_slots; 160 } 161 162 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] = 163 htole32((uint32_t)slot_phys + slot_mem_size); 164 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = 165 htole32(MAX_FRAGMENTS); 166 167 g_fragments_base = (char *)slot_mem + slot_mem_size; 168 slot_mem_size += frag_mem_size; 169 170 g_free_fragments = g_fragments_base; 171 for (i = 0; i < (MAX_FRAGMENTS - 1); i++) { 172 *(char **)&g_fragments_base[i*g_fragments_size] = 173 &g_fragments_base[(i + 1)*g_fragments_size]; 174 } 175 *(char **)&g_fragments_base[i * g_fragments_size] = NULL; 176 177 _sema_init(&g_free_fragments_sema, MAX_FRAGMENTS); 178 179 if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) != 180 VCHIQ_SUCCESS) { 181 err = -EINVAL; 182 goto failed_vchiq_init; 183 } 184 185 /* Send the base address of the slots to VideoCore */ 186 dsb(sy); /* Ensure all writes have completed */ 187 188 bus_dmamap_sync(dma_tag, dma_map, 0, slot_mem_size, 189 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 190 bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (uint32_t)slot_phys); 191 bus_dmamap_sync(dma_tag, dma_map, 0, slot_mem_size, 192 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 193 194 vchiq_log_info(vchiq_arm_log_level, 195 "vchiq_init - done (slots %p, phys %x)", 196 vchiq_slot_zero, (unsigned int)slot_phys); 197 198 vchiq_call_connected_callbacks(); 199 200 return 0; 201 202 failed_vchiq_init: 203 failed_init_slots: 204 failed_load: 205 bus_dmamap_unload(dma_tag, dma_map); 206 failed_alloc: 207 bus_dmamap_destroy(dma_tag, dma_map); 208 209 return err; 210 } 211 212 void __exit 213 vchiq_platform_exit(VCHIQ_STATE_T *state) 214 { 215 216 bus_dmamap_unload(dma_tag, dma_map); 217 bus_dmamap_destroy(dma_tag, dma_map); 218 } 219 220 221 VCHIQ_STATUS_T 222 vchiq_platform_init_state(VCHIQ_STATE_T *state) 223 { 224 VCHIQ_STATUS_T status = VCHIQ_SUCCESS; 225 state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL); 226 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1; 227 status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state); 228 if(status != VCHIQ_SUCCESS) 229 { 230 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0; 231 } 232 return status; 233 } 234 235 VCHIQ_ARM_STATE_T* 236 vchiq_platform_get_arm_state(VCHIQ_STATE_T *state) 237 { 238 if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited) 239 { 240 BUG(); 241 } 242 return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state; 243 } 244 245 VCHIQ_STATUS_T 246 vchiq_copy_from_user(void *dst, const void *src, int size) 247 { 248 vaddr_t va = (vaddr_t)src; 249 250 if (IS_USER_ADDRESS(va)) { 251 int error = copyin(src, dst, size); 252 return error ? VCHIQ_ERROR : VCHIQ_SUCCESS; 253 } else { 254 kcopy(src, dst, size); 255 return VCHIQ_SUCCESS; 256 } 257 } 258 259 typedef struct bulkinfo_struct { 260 void *pagelist; 261 bus_dma_segment_t pagelist_sgs[1]; 262 bus_size_t pagelist_size; 263 bus_dmamap_t pagelist_map; 264 bus_dmamap_t dmamap; 265 struct proc *proc; 266 void *buf; 267 int size; 268 } BULKINFO_T; 269 270 /* There is a potential problem with partial cache lines (pages?) 271 ** at the ends of the block when reading. If the CPU accessed anything in 272 ** the same line (page?) then it may have pulled old data into the cache, 273 ** obscuring the new data underneath. We can solve this by transferring the 274 ** partial cache lines separately, and allowing the ARM to copy into the 275 ** cached area. 276 */ 277 VCHIQ_STATUS_T 278 vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle, 279 void *buf, int size, int dir) 280 { 281 PAGELIST_T *pagelist; 282 BULKINFO_T *bi; 283 int nsegs; 284 int ret; 285 286 vchiq_log_info(vchiq_arm_log_level, 287 "%s: buf %p size %08x dir %s", __func__, buf, size, 288 dir == VCHIQ_BULK_RECEIVE ? "read" : "write"); 289 290 vaddr_t va = (vaddr_t)buf; 291 const size_t maxsegs = atop(round_page(va + size) - trunc_page(va)); 292 const int uvmflags = (dir == VCHIQ_BULK_RECEIVE ? 293 VM_PROT_READ : VM_PROT_WRITE); 294 const int dmaflags = (dir == VCHIQ_BULK_RECEIVE ? 295 BUS_DMA_READ : BUS_DMA_WRITE); 296 297 WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID); 298 299 bi = kmem_alloc(sizeof(*bi), KM_SLEEP); 300 bi->buf = buf; 301 bi->size = size; 302 bi->pagelist_size = sizeof(PAGELIST_T) + 303 (maxsegs * sizeof(uint32_t)); 304 bi->proc = curproc; 305 306 ret = bus_dmamem_alloc(dma_tag, bi->pagelist_size, 307 0 /*CACHE_LINE_SIZE*/, 0, bi->pagelist_sgs, 308 __arraycount(bi->pagelist_sgs), &nsegs, BUS_DMA_WAITOK); 309 310 if (ret != 0) 311 goto fail1; 312 313 ret = bus_dmamem_map(dma_tag, bi->pagelist_sgs, nsegs, 314 bi->pagelist_size, &bi->pagelist, BUS_DMA_COHERENT | BUS_DMA_WAITOK); 315 if (ret != 0) 316 goto fail2; 317 318 pagelist = bi->pagelist; 319 320 ret = bus_dmamap_create(dma_tag, bi->pagelist_size, 321 nsegs, bi->pagelist_size, 0, BUS_DMA_WAITOK, &bi->pagelist_map); 322 if (ret != 0) 323 goto fail3; 324 325 ret = bus_dmamap_load(dma_tag, bi->pagelist_map, pagelist, 326 bi->pagelist_size, NULL, BUS_DMA_WAITOK | BUS_DMA_WRITE); 327 if (ret != 0) 328 goto fail4; 329 330 /* 331 * Need to wire the buffer pages in. 332 */ 333 if (IS_USER_ADDRESS(buf)) { 334 ret = uvm_vslock(bi->proc->p_vmspace, buf, size, uvmflags); 335 if (ret != 0) { 336 printf("%s: uvm_vslock failed (%d)\n", __func__, ret); 337 goto fail5; 338 } 339 } 340 341 ret = bus_dmamap_create(dma_tag, size, maxsegs, size, 0, 342 BUS_DMA_WAITOK, &bi->dmamap); 343 344 if (ret != 0) 345 goto fail6; 346 347 ret = bus_dmamap_load(dma_tag, bi->dmamap, buf, size, 348 curproc, BUS_DMA_WAITOK | dmaflags); 349 350 if (ret != 0) 351 goto fail7; 352 353 bulk->handle = memhandle; 354 /* 355 * We've now got the bus_addr_t for the pagelist we want the transfer 356 * to use. 357 */ 358 bulk->data = (void *)bi->pagelist_map->dm_segs[0].ds_addr; 359 360 pagelist->type = htole16((dir == VCHIQ_BULK_RECEIVE) ? 361 PAGELIST_READ : PAGELIST_WRITE); 362 pagelist->length = htole32(size); 363 pagelist->offset = htole16(va & VCPAGE_OFFSET); 364 365 /* 366 * busdma already coalesces contiguous pages for us 367 */ 368 for (int i = 0; i < bi->dmamap->dm_nsegs; i++) { 369 bus_addr_t addr = bi->dmamap->dm_segs[i].ds_addr; 370 bus_size_t len = bi->dmamap->dm_segs[i].ds_len; 371 bus_size_t off = addr & VCPAGE_OFFSET; 372 int npgs = ((off + len + VCPAGE_OFFSET) >> VCPAGE_SHIFT); 373 374 pagelist->addrs[i] = htole32((addr & ~VCPAGE_OFFSET) | (npgs - 1)); 375 } 376 377 /* Partial cache lines (fragments) require special measures */ 378 if ((le16toh(pagelist->type) == PAGELIST_READ) && 379 ((le16toh(pagelist->offset) & (g_cache_line_size - 1)) || 380 ((le16toh(pagelist->offset) + le32toh(pagelist->length)) & (g_cache_line_size - 1)))) { 381 char *fragments; 382 383 if (down_interruptible(&g_free_fragments_sema) != 0) { 384 goto fail7; 385 } 386 387 WARN_ON(g_free_fragments == NULL); 388 389 down(&g_free_fragments_mutex); 390 fragments = g_free_fragments; 391 WARN_ON(fragments == NULL); 392 g_free_fragments = *(char **) g_free_fragments; 393 up(&g_free_fragments_mutex); 394 pagelist->type = htole16(PAGELIST_READ_WITH_FRAGMENTS + 395 (fragments - g_fragments_base) / g_fragments_size); 396 bus_dmamap_sync(dma_tag, dma_map, 397 (char *)fragments - g_fragments_base, sizeof(*fragments), 398 BUS_DMASYNC_PREREAD); 399 } 400 401 /* 402 * Store the BULKINFO_T address in remote_data, which isn't used by the 403 * slave. 404 */ 405 bulk->remote_data = bi; 406 407 bus_dmamap_sync(dma_tag, bi->pagelist_map, 0, 408 bi->pagelist_size, BUS_DMASYNC_PREWRITE); 409 410 bus_dmamap_sync(dma_tag, bi->dmamap, 0, bi->size, 411 le16toh(pagelist->type) == PAGELIST_WRITE ? 412 BUS_DMASYNC_PREWRITE : BUS_DMASYNC_PREREAD); 413 414 return VCHIQ_SUCCESS; 415 416 fail7: 417 bus_dmamap_destroy(dma_tag, bi->dmamap); 418 419 fail6: 420 if (IS_USER_ADDRESS(bi->buf)) 421 uvm_vsunlock(curproc->p_vmspace, bi->buf, bi->size); 422 423 fail5: 424 bus_dmamap_unload(dma_tag, bi->pagelist_map); 425 426 fail4: 427 bus_dmamap_destroy(dma_tag, bi->pagelist_map); 428 429 fail3: 430 bus_dmamem_unmap(dma_tag, bi->pagelist, bi->pagelist_size); 431 432 fail2: 433 bus_dmamem_free(dma_tag, bi->pagelist_sgs, 434 __arraycount(bi->pagelist_sgs)); 435 436 fail1: 437 kmem_free(bi, sizeof(*bi)); 438 return VCHIQ_ERROR; 439 } 440 441 void 442 vchiq_complete_bulk(VCHIQ_BULK_T *bulk) 443 { 444 if (bulk && bulk->remote_data && bulk->actual) { 445 int actual = bulk->actual; 446 BULKINFO_T *bi = bulk->remote_data; 447 PAGELIST_T *pagelist = bi->pagelist; 448 449 vchiq_log_trace(vchiq_arm_log_level, 450 "free_pagelist - %p, %d", pagelist, actual); 451 452 bus_dmamap_sync(dma_tag, bi->pagelist_map, 0, 453 bi->pagelist_size, BUS_DMASYNC_POSTWRITE); 454 455 bus_dmamap_sync(dma_tag, bi->dmamap, 0, bi->size, 456 le16toh(pagelist->type) == PAGELIST_WRITE ? 457 BUS_DMASYNC_POSTWRITE : BUS_DMASYNC_POSTREAD); 458 459 /* Deal with any partial cache lines (fragments) */ 460 if (le16toh(pagelist->type) >= PAGELIST_READ_WITH_FRAGMENTS) { 461 char *fragments = g_fragments_base + 462 (le16toh(pagelist->type) - PAGELIST_READ_WITH_FRAGMENTS) * 463 g_fragments_size; 464 int head_bytes, tail_bytes; 465 466 bus_dmamap_sync(dma_tag, dma_map, 467 (char *)fragments - g_fragments_base, g_fragments_size, 468 BUS_DMASYNC_POSTREAD); 469 470 head_bytes = (g_cache_line_size - le16toh(pagelist->offset)) & 471 (g_cache_line_size - 1); 472 tail_bytes = (le16toh(pagelist->offset) + actual) & 473 (g_cache_line_size - 1); 474 475 if ((actual >= 0) && (head_bytes != 0)) { 476 if (head_bytes > actual) 477 head_bytes = actual; 478 479 if (IS_USER_ADDRESS(bi->buf)) { 480 copyout_proc(bi->proc, fragments, 481 bi->buf, head_bytes); 482 } else { 483 kcopy(fragments, bi->buf, head_bytes); 484 } 485 } 486 if ((actual >= 0) && (head_bytes < actual) && 487 (tail_bytes != 0)) { 488 void *t = (char *)bi->buf + bi->size - 489 tail_bytes; 490 491 if (IS_USER_ADDRESS(bi->buf)) { 492 copyout_proc(bi->proc, 493 fragments + g_cache_line_size, t, 494 tail_bytes); 495 } else { 496 kcopy(fragments + g_cache_line_size, t, 497 tail_bytes); 498 } 499 } 500 501 down(&g_free_fragments_mutex); 502 *(char **)fragments = g_free_fragments; 503 g_free_fragments = fragments; 504 up(&g_free_fragments_mutex); 505 up(&g_free_fragments_sema); 506 } 507 bus_dmamap_unload(dma_tag, bi->dmamap); 508 bus_dmamap_destroy(dma_tag, bi->dmamap); 509 if (IS_USER_ADDRESS(bi->buf)) 510 uvm_vsunlock(bi->proc->p_vmspace, bi->buf, bi->size); 511 512 bus_dmamap_unload(dma_tag, bi->pagelist_map); 513 bus_dmamap_destroy(dma_tag, bi->pagelist_map); 514 bus_dmamem_unmap(dma_tag, bi->pagelist, 515 bi->pagelist_size); 516 bus_dmamem_free(dma_tag, bi->pagelist_sgs, 517 __arraycount(bi->pagelist_sgs)); 518 kmem_free(bi, sizeof(*bi)); 519 } 520 } 521 522 void 523 vchiq_transfer_bulk(VCHIQ_BULK_T *bulk) 524 { 525 /* 526 * This should only be called on the master (VideoCore) side, but 527 * provide an implementation to avoid the need for ifdefery. 528 */ 529 BUG(); 530 } 531 532 void 533 vchiq_dump_platform_state(void *dump_context) 534 { 535 char buf[80]; 536 int len; 537 len = snprintf(buf, sizeof(buf), 538 " Platform: 2835 (VC master)"); 539 vchiq_dump(dump_context, buf, len + 1); 540 } 541 542 VCHIQ_STATUS_T 543 vchiq_platform_suspend(VCHIQ_STATE_T *state) 544 { 545 return VCHIQ_ERROR; 546 } 547 548 VCHIQ_STATUS_T 549 vchiq_platform_resume(VCHIQ_STATE_T *state) 550 { 551 return VCHIQ_SUCCESS; 552 } 553 554 void 555 vchiq_platform_paused(VCHIQ_STATE_T *state) 556 { 557 } 558 559 void 560 vchiq_platform_resumed(VCHIQ_STATE_T *state) 561 { 562 } 563 564 int 565 vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state) 566 { 567 return 1; // autosuspend not supported - videocore always wanted 568 } 569 570 int 571 vchiq_platform_use_suspend_timer(void) 572 { 573 return 0; 574 } 575 void 576 vchiq_dump_platform_use_state(VCHIQ_STATE_T *state) 577 { 578 vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use"); 579 } 580 void 581 vchiq_platform_handle_timeout(VCHIQ_STATE_T *state) 582 { 583 (void)state; 584 } 585