Home | History | Annotate | Line # | Download | only in radeon
radeon_uvd.c revision 1.2
      1 /*
      2  * Copyright 2011 Advanced Micro Devices, Inc.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sub license, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20  *
     21  * The above copyright notice and this permission notice (including the
     22  * next paragraph) shall be included in all copies or substantial portions
     23  * of the Software.
     24  *
     25  */
     26 /*
     27  * Authors:
     28  *    Christian Knig <deathsimple (at) vodafone.de>
     29  */
     30 
     31 #include <linux/firmware.h>
     32 #include <linux/module.h>
     33 #include <drm/drmP.h>
     34 #include <drm/drm.h>
     35 
     36 #include "radeon.h"
     37 #include "r600d.h"
     38 
     39 /* 1 second timeout */
     40 #define UVD_IDLE_TIMEOUT_MS	1000
     41 
     42 /* Firmware Names */
     43 #define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
     44 #define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
     45 #define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
     46 #define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
     47 #define FIRMWARE_BONAIRE	"radeon/BONAIRE_uvd.bin"
     48 
     49 MODULE_FIRMWARE(FIRMWARE_RV710);
     50 MODULE_FIRMWARE(FIRMWARE_CYPRESS);
     51 MODULE_FIRMWARE(FIRMWARE_SUMO);
     52 MODULE_FIRMWARE(FIRMWARE_TAHITI);
     53 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
     54 
     55 static void radeon_uvd_idle_work_handler(struct work_struct *work);
     56 
     57 int radeon_uvd_init(struct radeon_device *rdev)
     58 {
     59 	unsigned long bo_size;
     60 	const char *fw_name;
     61 	int i, r;
     62 
     63 	INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
     64 
     65 	switch (rdev->family) {
     66 	case CHIP_RV710:
     67 	case CHIP_RV730:
     68 	case CHIP_RV740:
     69 		fw_name = FIRMWARE_RV710;
     70 		break;
     71 
     72 	case CHIP_CYPRESS:
     73 	case CHIP_HEMLOCK:
     74 	case CHIP_JUNIPER:
     75 	case CHIP_REDWOOD:
     76 	case CHIP_CEDAR:
     77 		fw_name = FIRMWARE_CYPRESS;
     78 		break;
     79 
     80 	case CHIP_SUMO:
     81 	case CHIP_SUMO2:
     82 	case CHIP_PALM:
     83 	case CHIP_CAYMAN:
     84 	case CHIP_BARTS:
     85 	case CHIP_TURKS:
     86 	case CHIP_CAICOS:
     87 		fw_name = FIRMWARE_SUMO;
     88 		break;
     89 
     90 	case CHIP_TAHITI:
     91 	case CHIP_VERDE:
     92 	case CHIP_PITCAIRN:
     93 	case CHIP_ARUBA:
     94 	case CHIP_OLAND:
     95 		fw_name = FIRMWARE_TAHITI;
     96 		break;
     97 
     98 	case CHIP_BONAIRE:
     99 	case CHIP_KABINI:
    100 	case CHIP_KAVERI:
    101 	case CHIP_HAWAII:
    102 	case CHIP_MULLINS:
    103 		fw_name = FIRMWARE_BONAIRE;
    104 		break;
    105 
    106 	default:
    107 		return -EINVAL;
    108 	}
    109 
    110 	r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
    111 	if (r) {
    112 		dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
    113 			fw_name);
    114 		return r;
    115 	}
    116 
    117 	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
    118 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
    119 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
    120 			     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
    121 	if (r) {
    122 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
    123 		return r;
    124 	}
    125 
    126 	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
    127 	if (r) {
    128 		radeon_bo_unref(&rdev->uvd.vcpu_bo);
    129 		dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
    130 		return r;
    131 	}
    132 
    133 	r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
    134 			  &rdev->uvd.gpu_addr);
    135 	if (r) {
    136 		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    137 		radeon_bo_unref(&rdev->uvd.vcpu_bo);
    138 		dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
    139 		return r;
    140 	}
    141 
    142 	r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
    143 	if (r) {
    144 		dev_err(rdev->dev, "(%d) UVD map failed\n", r);
    145 		return r;
    146 	}
    147 
    148 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    149 
    150 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
    151 		atomic_set(&rdev->uvd.handles[i], 0);
    152 		rdev->uvd.filp[i] = NULL;
    153 		rdev->uvd.img_size[i] = 0;
    154 	}
    155 
    156 	return 0;
    157 }
    158 
    159 void radeon_uvd_fini(struct radeon_device *rdev)
    160 {
    161 	int r;
    162 
    163 	if (rdev->uvd.vcpu_bo == NULL)
    164 		return;
    165 
    166 	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
    167 	if (!r) {
    168 		radeon_bo_kunmap(rdev->uvd.vcpu_bo);
    169 		radeon_bo_unpin(rdev->uvd.vcpu_bo);
    170 		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    171 	}
    172 
    173 	radeon_bo_unref(&rdev->uvd.vcpu_bo);
    174 
    175 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
    176 
    177 	release_firmware(rdev->uvd_fw);
    178 }
    179 
    180 int radeon_uvd_suspend(struct radeon_device *rdev)
    181 {
    182 	unsigned size;
    183 	uint8_t *ptr;
    184 	int i;
    185 
    186 	if (rdev->uvd.vcpu_bo == NULL)
    187 		return 0;
    188 
    189 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
    190 		if (atomic_read(&rdev->uvd.handles[i]))
    191 			break;
    192 
    193 	if (i == RADEON_MAX_UVD_HANDLES)
    194 		return 0;
    195 
    196 	size = radeon_bo_size(rdev->uvd.vcpu_bo);
    197 	size -= rdev->uvd_fw->size;
    198 
    199 	ptr = rdev->uvd.cpu_addr;
    200 	ptr += rdev->uvd_fw->size;
    201 
    202 	rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
    203 	memcpy(rdev->uvd.saved_bo, ptr, size);
    204 
    205 	return 0;
    206 }
    207 
    208 int radeon_uvd_resume(struct radeon_device *rdev)
    209 {
    210 	unsigned size;
    211 	uint8_t *ptr;
    212 
    213 	if (rdev->uvd.vcpu_bo == NULL)
    214 		return -EINVAL;
    215 
    216 	memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
    217 
    218 	size = radeon_bo_size(rdev->uvd.vcpu_bo);
    219 	size -= rdev->uvd_fw->size;
    220 
    221 	ptr = rdev->uvd.cpu_addr;
    222 	ptr += rdev->uvd_fw->size;
    223 
    224 	if (rdev->uvd.saved_bo != NULL) {
    225 		memcpy(ptr, rdev->uvd.saved_bo, size);
    226 		kfree(rdev->uvd.saved_bo);
    227 		rdev->uvd.saved_bo = NULL;
    228 	} else
    229 		memset(ptr, 0, size);
    230 
    231 	return 0;
    232 }
    233 
    234 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
    235 {
    236 	rbo->placement.fpfn = 0 >> PAGE_SHIFT;
    237 	rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
    238 }
    239 
    240 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
    241 {
    242 	int i, r;
    243 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
    244 		uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
    245 		if (handle != 0 && rdev->uvd.filp[i] == filp) {
    246 			struct radeon_fence *fence;
    247 
    248 			radeon_uvd_note_usage(rdev);
    249 
    250 			r = radeon_uvd_get_destroy_msg(rdev,
    251 				R600_RING_TYPE_UVD_INDEX, handle, &fence);
    252 			if (r) {
    253 				DRM_ERROR("Error destroying UVD (%d)!\n", r);
    254 				continue;
    255 			}
    256 
    257 			radeon_fence_wait(fence, false);
    258 			radeon_fence_unref(&fence);
    259 
    260 			rdev->uvd.filp[i] = NULL;
    261 			atomic_set(&rdev->uvd.handles[i], 0);
    262 		}
    263 	}
    264 }
    265 
    266 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
    267 {
    268 	unsigned stream_type = msg[4];
    269 	unsigned width = msg[6];
    270 	unsigned height = msg[7];
    271 	unsigned dpb_size = msg[9];
    272 	unsigned pitch = msg[28];
    273 
    274 	unsigned width_in_mb = width / 16;
    275 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
    276 	unsigned height_in_mb = round_up(height / 16, 2);
    277 #else
    278 	unsigned height_in_mb = ALIGN(height / 16, 2);
    279 #endif
    280 
    281 	unsigned image_size, tmp, min_dpb_size;
    282 
    283 	image_size = width * height;
    284 	image_size += image_size / 2;
    285 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
    286 	image_size = round_up(image_size, 1024);
    287 #else
    288 	image_size = ALIGN(image_size, 1024);
    289 #endif
    290 
    291 	switch (stream_type) {
    292 	case 0: /* H264 */
    293 
    294 		/* reference picture buffer */
    295 		min_dpb_size = image_size * 17;
    296 
    297 		/* macroblock context buffer */
    298 		min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
    299 
    300 		/* IT surface buffer */
    301 		min_dpb_size += width_in_mb * height_in_mb * 32;
    302 		break;
    303 
    304 	case 1: /* VC1 */
    305 
    306 		/* reference picture buffer */
    307 		min_dpb_size = image_size * 3;
    308 
    309 		/* CONTEXT_BUFFER */
    310 		min_dpb_size += width_in_mb * height_in_mb * 128;
    311 
    312 		/* IT surface buffer */
    313 		min_dpb_size += width_in_mb * 64;
    314 
    315 		/* DB surface buffer */
    316 		min_dpb_size += width_in_mb * 128;
    317 
    318 		/* BP */
    319 		tmp = max(width_in_mb, height_in_mb);
    320 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
    321 		min_dpb_size += round_up(tmp * 7 * 16, 64);
    322 #else
    323 		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
    324 #endif
    325 		break;
    326 
    327 	case 3: /* MPEG2 */
    328 
    329 		/* reference picture buffer */
    330 		min_dpb_size = image_size * 3;
    331 		break;
    332 
    333 	case 4: /* MPEG4 */
    334 
    335 		/* reference picture buffer */
    336 		min_dpb_size = image_size * 3;
    337 
    338 		/* CM */
    339 		min_dpb_size += width_in_mb * height_in_mb * 64;
    340 
    341 		/* IT surface buffer */
    342 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
    343 		min_dpb_size += round_up(width_in_mb * height_in_mb * 32, 64);
    344 #else
    345 		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
    346 #endif
    347 		break;
    348 
    349 	default:
    350 		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
    351 		return -EINVAL;
    352 	}
    353 
    354 	if (width > pitch) {
    355 		DRM_ERROR("Invalid UVD decoding target pitch!\n");
    356 		return -EINVAL;
    357 	}
    358 
    359 	if (dpb_size < min_dpb_size) {
    360 		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
    361 			  dpb_size, min_dpb_size);
    362 		return -EINVAL;
    363 	}
    364 
    365 	buf_sizes[0x1] = dpb_size;
    366 	buf_sizes[0x2] = image_size;
    367 	return 0;
    368 }
    369 
    370 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
    371 			     unsigned offset, unsigned buf_sizes[])
    372 {
    373 	int32_t *msg, msg_type, handle;
    374 	unsigned img_size = 0;
    375 	void *ptr;
    376 
    377 	int i, r;
    378 
    379 	if (offset & 0x3F) {
    380 		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
    381 		return -EINVAL;
    382 	}
    383 
    384 	if (bo->tbo.sync_obj) {
    385 		r = radeon_fence_wait(bo->tbo.sync_obj, false);
    386 		if (r) {
    387 			DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
    388 			return r;
    389 		}
    390 	}
    391 
    392 	r = radeon_bo_kmap(bo, &ptr);
    393 	if (r) {
    394 		DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
    395 		return r;
    396 	}
    397 
    398 	msg = (int32_t *)((uint8_t *)ptr + offset);
    399 
    400 	msg_type = msg[1];
    401 	handle = msg[2];
    402 
    403 	if (handle == 0) {
    404 		DRM_ERROR("Invalid UVD handle!\n");
    405 		return -EINVAL;
    406 	}
    407 
    408 	if (msg_type == 1) {
    409 		/* it's a decode msg, calc buffer sizes */
    410 		r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
    411 		/* calc image size (width * height) */
    412 		img_size = msg[6] * msg[7];
    413 		radeon_bo_kunmap(bo);
    414 		if (r)
    415 			return r;
    416 
    417 	} else if (msg_type == 2) {
    418 		/* it's a destroy msg, free the handle */
    419 		for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
    420 			atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
    421 		radeon_bo_kunmap(bo);
    422 		return 0;
    423 	} else {
    424 		/* it's a create msg, calc image size (width * height) */
    425 		img_size = msg[7] * msg[8];
    426 		radeon_bo_kunmap(bo);
    427 
    428 		if (msg_type != 0) {
    429 			DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
    430 			return -EINVAL;
    431 		}
    432 
    433 		/* it's a create msg, no special handling needed */
    434 	}
    435 
    436 	/* create or decode, validate the handle */
    437 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
    438 		if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
    439 			return 0;
    440 	}
    441 
    442 	/* handle not found try to alloc a new one */
    443 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
    444 		if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
    445 			p->rdev->uvd.filp[i] = p->filp;
    446 			p->rdev->uvd.img_size[i] = img_size;
    447 			return 0;
    448 		}
    449 	}
    450 
    451 	DRM_ERROR("No more free UVD handles!\n");
    452 	return -EINVAL;
    453 }
    454 
    455 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
    456 			       int data0, int data1,
    457 			       unsigned buf_sizes[], bool *has_msg_cmd)
    458 {
    459 	struct radeon_cs_chunk *relocs_chunk;
    460 	struct radeon_cs_reloc *reloc;
    461 	unsigned idx, cmd, offset;
    462 	uint64_t start, end;
    463 	int r;
    464 
    465 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
    466 	offset = radeon_get_ib_value(p, data0);
    467 	idx = radeon_get_ib_value(p, data1);
    468 	if (idx >= relocs_chunk->length_dw) {
    469 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
    470 			  idx, relocs_chunk->length_dw);
    471 		return -EINVAL;
    472 	}
    473 
    474 	reloc = p->relocs_ptr[(idx / 4)];
    475 	start = reloc->gpu_offset;
    476 	end = start + radeon_bo_size(reloc->robj);
    477 	start += offset;
    478 
    479 	p->ib.ptr[data0] = start & 0xFFFFFFFF;
    480 	p->ib.ptr[data1] = start >> 32;
    481 
    482 	cmd = radeon_get_ib_value(p, p->idx) >> 1;
    483 
    484 	if (cmd < 0x4) {
    485 		if (end <= start) {
    486 			DRM_ERROR("invalid reloc offset %X!\n", offset);
    487 			return -EINVAL;
    488 		}
    489 		if ((end - start) < buf_sizes[cmd]) {
    490 			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
    491 				  (unsigned)(end - start), buf_sizes[cmd]);
    492 			return -EINVAL;
    493 		}
    494 
    495 	} else if (cmd != 0x100) {
    496 		DRM_ERROR("invalid UVD command %X!\n", cmd);
    497 		return -EINVAL;
    498 	}
    499 
    500 	if ((start >> 28) != ((end - 1) >> 28)) {
    501 		DRM_ERROR("reloc %"PRIX64"-%"PRIX64" crossing 256MB boundary!\n",
    502 			  start, end);
    503 		return -EINVAL;
    504 	}
    505 
    506 	/* TODO: is this still necessary on NI+ ? */
    507 	if ((cmd == 0 || cmd == 0x3) &&
    508 	    (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
    509 		DRM_ERROR("msg/fb buffer %"PRIX64"-%"PRIX64" out of 256MB segment!\n",
    510 			  start, end);
    511 		return -EINVAL;
    512 	}
    513 
    514 	if (cmd == 0) {
    515 		if (*has_msg_cmd) {
    516 			DRM_ERROR("More than one message in a UVD-IB!\n");
    517 			return -EINVAL;
    518 		}
    519 		*has_msg_cmd = true;
    520 		r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
    521 		if (r)
    522 			return r;
    523 	} else if (!*has_msg_cmd) {
    524 		DRM_ERROR("Message needed before other commands are send!\n");
    525 		return -EINVAL;
    526 	}
    527 
    528 	return 0;
    529 }
    530 
    531 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
    532 			     struct radeon_cs_packet *pkt,
    533 			     int *data0, int *data1,
    534 			     unsigned buf_sizes[],
    535 			     bool *has_msg_cmd)
    536 {
    537 	int i, r;
    538 
    539 	p->idx++;
    540 	for (i = 0; i <= pkt->count; ++i) {
    541 		switch (pkt->reg + i*4) {
    542 		case UVD_GPCOM_VCPU_DATA0:
    543 			*data0 = p->idx;
    544 			break;
    545 		case UVD_GPCOM_VCPU_DATA1:
    546 			*data1 = p->idx;
    547 			break;
    548 		case UVD_GPCOM_VCPU_CMD:
    549 			r = radeon_uvd_cs_reloc(p, *data0, *data1,
    550 						buf_sizes, has_msg_cmd);
    551 			if (r)
    552 				return r;
    553 			break;
    554 		case UVD_ENGINE_CNTL:
    555 			break;
    556 		default:
    557 			DRM_ERROR("Invalid reg 0x%X!\n",
    558 				  pkt->reg + i*4);
    559 			return -EINVAL;
    560 		}
    561 		p->idx++;
    562 	}
    563 	return 0;
    564 }
    565 
    566 int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
    567 {
    568 	struct radeon_cs_packet pkt;
    569 	int r, data0 = 0, data1 = 0;
    570 
    571 	/* does the IB has a msg command */
    572 	bool has_msg_cmd = false;
    573 
    574 	/* minimum buffer sizes */
    575 	unsigned buf_sizes[] = {
    576 		[0x00000000]	=	2048,
    577 		[0x00000001]	=	32 * 1024 * 1024,
    578 		[0x00000002]	=	2048 * 1152 * 3,
    579 		[0x00000003]	=	2048,
    580 	};
    581 
    582 	if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
    583 		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
    584 			  p->chunks[p->chunk_ib_idx].length_dw);
    585 		return -EINVAL;
    586 	}
    587 
    588 	if (p->chunk_relocs_idx == -1) {
    589 		DRM_ERROR("No relocation chunk !\n");
    590 		return -EINVAL;
    591 	}
    592 
    593 
    594 	do {
    595 		r = radeon_cs_packet_parse(p, &pkt, p->idx);
    596 		if (r)
    597 			return r;
    598 		switch (pkt.type) {
    599 		case RADEON_PACKET_TYPE0:
    600 			r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
    601 					      buf_sizes, &has_msg_cmd);
    602 			if (r)
    603 				return r;
    604 			break;
    605 		case RADEON_PACKET_TYPE2:
    606 			p->idx += pkt.count + 2;
    607 			break;
    608 		default:
    609 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
    610 			return -EINVAL;
    611 		}
    612 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
    613 
    614 	if (!has_msg_cmd) {
    615 		DRM_ERROR("UVD-IBs need a msg command!\n");
    616 		return -EINVAL;
    617 	}
    618 
    619 	return 0;
    620 }
    621 
    622 static int radeon_uvd_send_msg(struct radeon_device *rdev,
    623 			       int ring, struct radeon_bo *bo,
    624 			       struct radeon_fence **fence)
    625 {
    626 	struct ttm_validate_buffer tv;
    627 	struct ww_acquire_ctx ticket;
    628 	struct list_head head;
    629 	struct radeon_ib ib;
    630 	uint64_t addr;
    631 	int i, r;
    632 
    633 	memset(&tv, 0, sizeof(tv));
    634 	tv.bo = &bo->tbo;
    635 
    636 	INIT_LIST_HEAD(&head);
    637 	list_add(&tv.head, &head);
    638 
    639 	r = ttm_eu_reserve_buffers(&ticket, &head);
    640 	if (r)
    641 		return r;
    642 
    643 	radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
    644 	radeon_uvd_force_into_uvd_segment(bo);
    645 
    646 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
    647 	if (r)
    648 		goto err;
    649 
    650 	r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
    651 	if (r)
    652 		goto err;
    653 
    654 	addr = radeon_bo_gpu_offset(bo);
    655 	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
    656 	ib.ptr[1] = addr;
    657 	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
    658 	ib.ptr[3] = addr >> 32;
    659 	ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
    660 	ib.ptr[5] = 0;
    661 	for (i = 6; i < 16; ++i)
    662 		ib.ptr[i] = PACKET2(0);
    663 	ib.length_dw = 16;
    664 
    665 	r = radeon_ib_schedule(rdev, &ib, NULL);
    666 	if (r)
    667 		goto err;
    668 	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
    669 
    670 	if (fence)
    671 		*fence = radeon_fence_ref(ib.fence);
    672 
    673 	radeon_ib_free(rdev, &ib);
    674 	radeon_bo_unref(&bo);
    675 	return 0;
    676 
    677 err:
    678 	ttm_eu_backoff_reservation(&ticket, &head);
    679 	return r;
    680 }
    681 
    682 /* multiple fence commands without any stream commands in between can
    683    crash the vcpu so just try to emmit a dummy create/destroy msg to
    684    avoid this */
    685 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
    686 			      uint32_t handle, struct radeon_fence **fence)
    687 {
    688 	struct radeon_bo *bo;
    689 	uint32_t *msg;
    690 	int r, i;
    691 
    692 	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
    693 			     RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
    694 	if (r)
    695 		return r;
    696 
    697 	r = radeon_bo_reserve(bo, false);
    698 	if (r) {
    699 		radeon_bo_unref(&bo);
    700 		return r;
    701 	}
    702 
    703 	r = radeon_bo_kmap(bo, (void **)&msg);
    704 	if (r) {
    705 		radeon_bo_unreserve(bo);
    706 		radeon_bo_unref(&bo);
    707 		return r;
    708 	}
    709 
    710 	/* stitch together an UVD create msg */
    711 	msg[0] = cpu_to_le32(0x00000de4);
    712 	msg[1] = cpu_to_le32(0x00000000);
    713 	msg[2] = cpu_to_le32(handle);
    714 	msg[3] = cpu_to_le32(0x00000000);
    715 	msg[4] = cpu_to_le32(0x00000000);
    716 	msg[5] = cpu_to_le32(0x00000000);
    717 	msg[6] = cpu_to_le32(0x00000000);
    718 	msg[7] = cpu_to_le32(0x00000780);
    719 	msg[8] = cpu_to_le32(0x00000440);
    720 	msg[9] = cpu_to_le32(0x00000000);
    721 	msg[10] = cpu_to_le32(0x01b37000);
    722 	for (i = 11; i < 1024; ++i)
    723 		msg[i] = cpu_to_le32(0x0);
    724 
    725 	radeon_bo_kunmap(bo);
    726 	radeon_bo_unreserve(bo);
    727 
    728 	return radeon_uvd_send_msg(rdev, ring, bo, fence);
    729 }
    730 
    731 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
    732 			       uint32_t handle, struct radeon_fence **fence)
    733 {
    734 	struct radeon_bo *bo;
    735 	uint32_t *msg;
    736 	int r, i;
    737 
    738 	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
    739 			     RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
    740 	if (r)
    741 		return r;
    742 
    743 	r = radeon_bo_reserve(bo, false);
    744 	if (r) {
    745 		radeon_bo_unref(&bo);
    746 		return r;
    747 	}
    748 
    749 	r = radeon_bo_kmap(bo, (void **)&msg);
    750 	if (r) {
    751 		radeon_bo_unreserve(bo);
    752 		radeon_bo_unref(&bo);
    753 		return r;
    754 	}
    755 
    756 	/* stitch together an UVD destroy msg */
    757 	msg[0] = cpu_to_le32(0x00000de4);
    758 	msg[1] = cpu_to_le32(0x00000002);
    759 	msg[2] = cpu_to_le32(handle);
    760 	msg[3] = cpu_to_le32(0x00000000);
    761 	for (i = 4; i < 1024; ++i)
    762 		msg[i] = cpu_to_le32(0x0);
    763 
    764 	radeon_bo_kunmap(bo);
    765 	radeon_bo_unreserve(bo);
    766 
    767 	return radeon_uvd_send_msg(rdev, ring, bo, fence);
    768 }
    769 
    770 /**
    771  * radeon_uvd_count_handles - count number of open streams
    772  *
    773  * @rdev: radeon_device pointer
    774  * @sd: number of SD streams
    775  * @hd: number of HD streams
    776  *
    777  * Count the number of open SD/HD streams as a hint for power mangement
    778  */
    779 static void radeon_uvd_count_handles(struct radeon_device *rdev,
    780 				     unsigned *sd, unsigned *hd)
    781 {
    782 	unsigned i;
    783 
    784 	*sd = 0;
    785 	*hd = 0;
    786 
    787 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
    788 		if (!atomic_read(&rdev->uvd.handles[i]))
    789 			continue;
    790 
    791 		if (rdev->uvd.img_size[i] >= 720*576)
    792 			++(*hd);
    793 		else
    794 			++(*sd);
    795 	}
    796 }
    797 
    798 static void radeon_uvd_idle_work_handler(struct work_struct *work)
    799 {
    800 	struct radeon_device *rdev =
    801 		container_of(work, struct radeon_device, uvd.idle_work.work);
    802 
    803 	if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
    804 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    805 			radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
    806 						 &rdev->pm.dpm.hd);
    807 			radeon_dpm_enable_uvd(rdev, false);
    808 		} else {
    809 			radeon_set_uvd_clocks(rdev, 0, 0);
    810 		}
    811 	} else {
    812 		schedule_delayed_work(&rdev->uvd.idle_work,
    813 				      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
    814 	}
    815 }
    816 
    817 void radeon_uvd_note_usage(struct radeon_device *rdev)
    818 {
    819 	bool streams_changed = false;
    820 	bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
    821 	set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
    822 					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
    823 
    824 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    825 		unsigned hd = 0, sd = 0;
    826 		radeon_uvd_count_handles(rdev, &sd, &hd);
    827 		if ((rdev->pm.dpm.sd != sd) ||
    828 		    (rdev->pm.dpm.hd != hd)) {
    829 			rdev->pm.dpm.sd = sd;
    830 			rdev->pm.dpm.hd = hd;
    831 			streams_changed = true;
    832 		}
    833 	}
    834 
    835 	if (set_clocks || streams_changed) {
    836 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    837 			radeon_dpm_enable_uvd(rdev, true);
    838 		} else {
    839 			radeon_set_uvd_clocks(rdev, 53300, 40000);
    840 		}
    841 	}
    842 }
    843 
    844 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
    845 					      unsigned target_freq,
    846 					      unsigned pd_min,
    847 					      unsigned pd_even)
    848 {
    849 	unsigned post_div = vco_freq / target_freq;
    850 
    851 	/* adjust to post divider minimum value */
    852 	if (post_div < pd_min)
    853 		post_div = pd_min;
    854 
    855 	/* we alway need a frequency less than or equal the target */
    856 	if ((vco_freq / post_div) > target_freq)
    857 		post_div += 1;
    858 
    859 	/* post dividers above a certain value must be even */
    860 	if (post_div > pd_even && post_div % 2)
    861 		post_div += 1;
    862 
    863 	return post_div;
    864 }
    865 
    866 /**
    867  * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
    868  *
    869  * @rdev: radeon_device pointer
    870  * @vclk: wanted VCLK
    871  * @dclk: wanted DCLK
    872  * @vco_min: minimum VCO frequency
    873  * @vco_max: maximum VCO frequency
    874  * @fb_factor: factor to multiply vco freq with
    875  * @fb_mask: limit and bitmask for feedback divider
    876  * @pd_min: post divider minimum
    877  * @pd_max: post divider maximum
    878  * @pd_even: post divider must be even above this value
    879  * @optimal_fb_div: resulting feedback divider
    880  * @optimal_vclk_div: resulting vclk post divider
    881  * @optimal_dclk_div: resulting dclk post divider
    882  *
    883  * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
    884  * Returns zero on success -EINVAL on error.
    885  */
    886 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
    887 				  unsigned vclk, unsigned dclk,
    888 				  unsigned vco_min, unsigned vco_max,
    889 				  unsigned fb_factor, unsigned fb_mask,
    890 				  unsigned pd_min, unsigned pd_max,
    891 				  unsigned pd_even,
    892 				  unsigned *optimal_fb_div,
    893 				  unsigned *optimal_vclk_div,
    894 				  unsigned *optimal_dclk_div)
    895 {
    896 	unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
    897 
    898 	/* start off with something large */
    899 	unsigned optimal_score = ~0;
    900 
    901 	/* loop through vco from low to high */
    902 	vco_min = max(max(vco_min, vclk), dclk);
    903 	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
    904 
    905 		uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
    906 		unsigned vclk_div, dclk_div, score;
    907 
    908 		do_div(fb_div, ref_freq);
    909 
    910 		/* fb div out of range ? */
    911 		if (fb_div > fb_mask)
    912 			break; /* it can oly get worse */
    913 
    914 		fb_div &= fb_mask;
    915 
    916 		/* calc vclk divider with current vco freq */
    917 		vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
    918 							 pd_min, pd_even);
    919 		if (vclk_div > pd_max)
    920 			break; /* vco is too big, it has to stop */
    921 
    922 		/* calc dclk divider with current vco freq */
    923 		dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
    924 							 pd_min, pd_even);
    925 		if (vclk_div > pd_max)
    926 			break; /* vco is too big, it has to stop */
    927 
    928 		/* calc score with current vco freq */
    929 		score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
    930 
    931 		/* determine if this vco setting is better than current optimal settings */
    932 		if (score < optimal_score) {
    933 			*optimal_fb_div = fb_div;
    934 			*optimal_vclk_div = vclk_div;
    935 			*optimal_dclk_div = dclk_div;
    936 			optimal_score = score;
    937 			if (optimal_score == 0)
    938 				break; /* it can't get better than this */
    939 		}
    940 	}
    941 
    942 	/* did we found a valid setup ? */
    943 	if (optimal_score == ~0)
    944 		return -EINVAL;
    945 
    946 	return 0;
    947 }
    948 
    949 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
    950 				unsigned cg_upll_func_cntl)
    951 {
    952 	unsigned i;
    953 
    954 	/* make sure UPLL_CTLREQ is deasserted */
    955 	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
    956 
    957 	mdelay(10);
    958 
    959 	/* assert UPLL_CTLREQ */
    960 	WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
    961 
    962 	/* wait for CTLACK and CTLACK2 to get asserted */
    963 	for (i = 0; i < 100; ++i) {
    964 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
    965 		if ((RREG32(cg_upll_func_cntl) & mask) == mask)
    966 			break;
    967 		mdelay(10);
    968 	}
    969 
    970 	/* deassert UPLL_CTLREQ */
    971 	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
    972 
    973 	if (i == 100) {
    974 		DRM_ERROR("Timeout setting UVD clocks!\n");
    975 		return -ETIMEDOUT;
    976 	}
    977 
    978 	return 0;
    979 }
    980