Home | History | Annotate | Line # | Download | only in radeon
      1 /*	$NetBSD: radeon_vce.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2013 Advanced Micro Devices, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  * Authors: Christian Knig <christian.koenig (at) amd.com>
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: radeon_vce.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $");
     32 
     33 #include <linux/firmware.h>
     34 #include <linux/module.h>
     35 
     36 #include <drm/drm.h>
     37 
     38 #include "radeon.h"
     39 #include "radeon_asic.h"
     40 #include "sid.h"
     41 
     42 /* 1 second timeout */
     43 #define VCE_IDLE_TIMEOUT_MS	1000
     44 
     45 /* Firmware Names */
     46 #define FIRMWARE_TAHITI	"radeon/TAHITI_vce.bin"
     47 #define FIRMWARE_BONAIRE	"radeon/BONAIRE_vce.bin"
     48 
     49 MODULE_FIRMWARE(FIRMWARE_TAHITI);
     50 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
     51 
     52 static void radeon_vce_idle_work_handler(struct work_struct *work);
     53 
     54 #ifdef __NetBSD__		/* XXX Ugh!  */
     55 static bool
     56 scan_2dec_uint(const char **sp, char delim, unsigned int *uintp)
     57 {
     58 	u_int val = 0, n;
     59 	char c;
     60 
     61 	for (n = 0; n < 2; n++) {
     62 		c = *(*sp)++;
     63 		if (!isdigit((unsigned char)c))
     64 			return false;
     65 		if (n != 0)
     66 			val *= 10;
     67 		val += (c - '0');
     68 		if (*(*sp) == delim)
     69 			break;
     70 	}
     71 	if (*(*sp) != delim)
     72 		return false;
     73 
     74 	(*sp)++;
     75 	*uintp = val;
     76 	return true;
     77 }
     78 
     79 static bool
     80 scan_2dec_u8(const char **sp, char delim, uint8_t *u8p)
     81 {
     82 	unsigned int val;
     83 
     84 	if (!scan_2dec_uint(sp, delim, &val))
     85 		return false;
     86 
     87 	*u8p = (uint8_t)val;
     88 	return true;
     89 }
     90 #endif
     91 
     92 /**
     93  * radeon_vce_init - allocate memory, load vce firmware
     94  *
     95  * @rdev: radeon_device pointer
     96  *
     97  * First step to get VCE online, allocate memory and load the firmware
     98  */
     99 int radeon_vce_init(struct radeon_device *rdev)
    100 {
    101 	static const char *fw_version = "[ATI LIB=VCEFW,";
    102 	static const char *fb_version = "[ATI LIB=VCEFWSTATS,";
    103 	unsigned long size;
    104 	const char *fw_name, *c;
    105 	uint8_t start, mid, end;
    106 	int i, r;
    107 
    108 	INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
    109 
    110 	switch (rdev->family) {
    111 	case CHIP_TAHITI:
    112 	case CHIP_PITCAIRN:
    113 	case CHIP_VERDE:
    114 	case CHIP_OLAND:
    115 	case CHIP_ARUBA:
    116 		fw_name = FIRMWARE_TAHITI;
    117 		break;
    118 
    119 	case CHIP_BONAIRE:
    120 	case CHIP_KAVERI:
    121 	case CHIP_KABINI:
    122 	case CHIP_HAWAII:
    123 	case CHIP_MULLINS:
    124 		fw_name = FIRMWARE_BONAIRE;
    125 		break;
    126 
    127 	default:
    128 		return -EINVAL;
    129 	}
    130 
    131 	r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev);
    132 	if (r) {
    133 		dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n",
    134 			fw_name);
    135 		return r;
    136 	}
    137 
    138 	/* search for firmware version */
    139 
    140 	size = rdev->vce_fw->size - strlen(fw_version) - 9;
    141 	c = rdev->vce_fw->data;
    142 	for (;size > 0; --size, ++c)
    143 		if (strncmp(c, fw_version, strlen(fw_version)) == 0)
    144 			break;
    145 
    146 	if (size == 0)
    147 		return -EINVAL;
    148 
    149 	c += strlen(fw_version);
    150 #ifdef __NetBSD__
    151 	if (!scan_2dec_u8(&c, '.', &start))
    152 		return -EINVAL;
    153 	if (!scan_2dec_u8(&c, '.', &mid))
    154 		return -EINVAL;
    155 	if (!scan_2dec_u8(&c, ']', &end))
    156 		return -EINVAL;
    157 #else
    158 	if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3)
    159 		return -EINVAL;
    160 #endif
    161 
    162 	/* search for feedback version */
    163 
    164 	size = rdev->vce_fw->size - strlen(fb_version) - 3;
    165 	c = rdev->vce_fw->data;
    166 	for (;size > 0; --size, ++c)
    167 		if (strncmp(c, fb_version, strlen(fb_version)) == 0)
    168 			break;
    169 
    170 	if (size == 0)
    171 		return -EINVAL;
    172 
    173 	c += strlen(fb_version);
    174 #ifdef __NetBSD__
    175 	if (!scan_2dec_uint(&c, ']', &rdev->vce.fb_version))
    176 		return -EINVAL;
    177 #else
    178 	if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1)
    179 		return -EINVAL;
    180 #endif
    181 
    182 	DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n",
    183 		 start, mid, end, rdev->vce.fb_version);
    184 
    185 	rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
    186 
    187 	/* we can only work with this fw version for now */
    188 	if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) &&
    189 	    (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) &&
    190 	    (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8))))
    191 		return -EINVAL;
    192 
    193 	/* allocate firmware, stack and heap BO */
    194 
    195 	if (rdev->family < CHIP_BONAIRE)
    196 		size = vce_v1_0_bo_size(rdev);
    197 	else
    198 		size = vce_v2_0_bo_size(rdev);
    199 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
    200 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
    201 			     &rdev->vce.vcpu_bo);
    202 	if (r) {
    203 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
    204 		return r;
    205 	}
    206 
    207 	r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
    208 	if (r) {
    209 		radeon_bo_unref(&rdev->vce.vcpu_bo);
    210 		dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
    211 		return r;
    212 	}
    213 
    214 	r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
    215 			  &rdev->vce.gpu_addr);
    216 	radeon_bo_unreserve(rdev->vce.vcpu_bo);
    217 	if (r) {
    218 		radeon_bo_unref(&rdev->vce.vcpu_bo);
    219 		dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r);
    220 		return r;
    221 	}
    222 
    223 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
    224 		atomic_set(&rdev->vce.handles[i], 0);
    225 		rdev->vce.filp[i] = NULL;
    226 	}
    227 
    228 	return 0;
    229 }
    230 
    231 /**
    232  * radeon_vce_fini - free memory
    233  *
    234  * @rdev: radeon_device pointer
    235  *
    236  * Last step on VCE teardown, free firmware memory
    237  */
    238 void radeon_vce_fini(struct radeon_device *rdev)
    239 {
    240 	if (rdev->vce.vcpu_bo == NULL)
    241 		return;
    242 
    243 	radeon_bo_unref(&rdev->vce.vcpu_bo);
    244 
    245 	release_firmware(rdev->vce_fw);
    246 }
    247 
    248 /**
    249  * radeon_vce_suspend - unpin VCE fw memory
    250  *
    251  * @rdev: radeon_device pointer
    252  *
    253  */
    254 int radeon_vce_suspend(struct radeon_device *rdev)
    255 {
    256 	int i;
    257 
    258 	if (rdev->vce.vcpu_bo == NULL)
    259 		return 0;
    260 
    261 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
    262 		if (atomic_read(&rdev->vce.handles[i]))
    263 			break;
    264 
    265 	if (i == RADEON_MAX_VCE_HANDLES)
    266 		return 0;
    267 
    268 	/* TODO: suspending running encoding sessions isn't supported */
    269 	return -EINVAL;
    270 }
    271 
    272 /**
    273  * radeon_vce_resume - pin VCE fw memory
    274  *
    275  * @rdev: radeon_device pointer
    276  *
    277  */
    278 int radeon_vce_resume(struct radeon_device *rdev)
    279 {
    280 	void *cpu_addr;
    281 	int r;
    282 
    283 	if (rdev->vce.vcpu_bo == NULL)
    284 		return -EINVAL;
    285 
    286 	r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
    287 	if (r) {
    288 		dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
    289 		return r;
    290 	}
    291 
    292 	r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr);
    293 	if (r) {
    294 		radeon_bo_unreserve(rdev->vce.vcpu_bo);
    295 		dev_err(rdev->dev, "(%d) VCE map failed\n", r);
    296 		return r;
    297 	}
    298 
    299 	memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
    300 	if (rdev->family < CHIP_BONAIRE)
    301 		r = vce_v1_0_load_fw(rdev, cpu_addr);
    302 	else
    303 		memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
    304 
    305 	radeon_bo_kunmap(rdev->vce.vcpu_bo);
    306 
    307 	radeon_bo_unreserve(rdev->vce.vcpu_bo);
    308 
    309 	return r;
    310 }
    311 
    312 /**
    313  * radeon_vce_idle_work_handler - power off VCE
    314  *
    315  * @work: pointer to work structure
    316  *
    317  * power of VCE when it's not used any more
    318  */
    319 static void radeon_vce_idle_work_handler(struct work_struct *work)
    320 {
    321 	struct radeon_device *rdev =
    322 		container_of(work, struct radeon_device, vce.idle_work.work);
    323 
    324 	if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) &&
    325 	    (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) {
    326 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    327 			radeon_dpm_enable_vce(rdev, false);
    328 		} else {
    329 			radeon_set_vce_clocks(rdev, 0, 0);
    330 		}
    331 	} else {
    332 		schedule_delayed_work(&rdev->vce.idle_work,
    333 				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
    334 	}
    335 }
    336 
    337 /**
    338  * radeon_vce_note_usage - power up VCE
    339  *
    340  * @rdev: radeon_device pointer
    341  *
    342  * Make sure VCE is powerd up when we want to use it
    343  */
    344 void radeon_vce_note_usage(struct radeon_device *rdev)
    345 {
    346 	bool streams_changed = false;
    347 	bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work);
    348 	set_clocks &= schedule_delayed_work(&rdev->vce.idle_work,
    349 					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
    350 
    351 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    352 		/* XXX figure out if the streams changed */
    353 		streams_changed = false;
    354 	}
    355 
    356 	if (set_clocks || streams_changed) {
    357 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    358 			radeon_dpm_enable_vce(rdev, true);
    359 		} else {
    360 			radeon_set_vce_clocks(rdev, 53300, 40000);
    361 		}
    362 	}
    363 }
    364 
    365 /**
    366  * radeon_vce_free_handles - free still open VCE handles
    367  *
    368  * @rdev: radeon_device pointer
    369  * @filp: drm file pointer
    370  *
    371  * Close all VCE handles still open by this file pointer
    372  */
    373 void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp)
    374 {
    375 	int i, r;
    376 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
    377 		uint32_t handle = atomic_read(&rdev->vce.handles[i]);
    378 		if (!handle || rdev->vce.filp[i] != filp)
    379 			continue;
    380 
    381 		radeon_vce_note_usage(rdev);
    382 
    383 		r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX,
    384 					       handle, NULL);
    385 		if (r)
    386 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
    387 
    388 		rdev->vce.filp[i] = NULL;
    389 		atomic_set(&rdev->vce.handles[i], 0);
    390 	}
    391 }
    392 
    393 /**
    394  * radeon_vce_get_create_msg - generate a VCE create msg
    395  *
    396  * @rdev: radeon_device pointer
    397  * @ring: ring we should submit the msg to
    398  * @handle: VCE session handle to use
    399  * @fence: optional fence to return
    400  *
    401  * Open up a stream for HW test
    402  */
    403 int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
    404 			      uint32_t handle, struct radeon_fence **fence)
    405 {
    406 	const unsigned ib_size_dw = 1024;
    407 	struct radeon_ib ib;
    408 	uint64_t dummy;
    409 	int i, r;
    410 
    411 	r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
    412 	if (r) {
    413 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
    414 		return r;
    415 	}
    416 
    417 	dummy = ib.gpu_addr + 1024;
    418 
    419 	/* stitch together an VCE create msg */
    420 	ib.length_dw = 0;
    421 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
    422 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
    423 	ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
    424 
    425 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000030); /* len */
    426 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x01000001); /* create cmd */
    427 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
    428 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000042);
    429 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000a);
    430 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
    431 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000080);
    432 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000060);
    433 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
    434 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
    435 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c);
    436 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
    437 
    438 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
    439 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
    440 	ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
    441 	ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
    442 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
    443 
    444 	for (i = ib.length_dw; i < ib_size_dw; ++i)
    445 		ib.ptr[i] = cpu_to_le32(0x0);
    446 
    447 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
    448 	if (r)
    449 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
    450 
    451 
    452 	if (fence)
    453 		*fence = radeon_fence_ref(ib.fence);
    454 
    455 	radeon_ib_free(rdev, &ib);
    456 
    457 	return r;
    458 }
    459 
    460 /**
    461  * radeon_vce_get_destroy_msg - generate a VCE destroy msg
    462  *
    463  * @rdev: radeon_device pointer
    464  * @ring: ring we should submit the msg to
    465  * @handle: VCE session handle to use
    466  * @fence: optional fence to return
    467  *
    468  * Close up a stream for HW test or if userspace failed to do so
    469  */
    470 int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
    471 			       uint32_t handle, struct radeon_fence **fence)
    472 {
    473 	const unsigned ib_size_dw = 1024;
    474 	struct radeon_ib ib;
    475 	uint64_t dummy;
    476 	int i, r;
    477 
    478 	r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
    479 	if (r) {
    480 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
    481 		return r;
    482 	}
    483 
    484 	dummy = ib.gpu_addr + 1024;
    485 
    486 	/* stitch together an VCE destroy msg */
    487 	ib.length_dw = 0;
    488 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
    489 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
    490 	ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
    491 
    492 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
    493 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
    494 	ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
    495 	ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
    496 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
    497 
    498 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000008); /* len */
    499 	ib.ptr[ib.length_dw++] = cpu_to_le32(0x02000001); /* destroy cmd */
    500 
    501 	for (i = ib.length_dw; i < ib_size_dw; ++i)
    502 		ib.ptr[i] = cpu_to_le32(0x0);
    503 
    504 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
    505 	if (r) {
    506 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
    507 	}
    508 
    509 	if (fence)
    510 		*fence = radeon_fence_ref(ib.fence);
    511 
    512 	radeon_ib_free(rdev, &ib);
    513 
    514 	return r;
    515 }
    516 
    517 /**
    518  * radeon_vce_cs_reloc - command submission relocation
    519  *
    520  * @p: parser context
    521  * @lo: address of lower dword
    522  * @hi: address of higher dword
    523  * @size: size of checker for relocation buffer
    524  *
    525  * Patch relocation inside command stream with real buffer address
    526  */
    527 int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
    528 			unsigned size)
    529 {
    530 	struct radeon_cs_chunk *relocs_chunk;
    531 	struct radeon_bo_list *reloc;
    532 	uint64_t start, end, offset;
    533 	unsigned idx;
    534 
    535 	relocs_chunk = p->chunk_relocs;
    536 	offset = radeon_get_ib_value(p, lo);
    537 	idx = radeon_get_ib_value(p, hi);
    538 
    539 	if (idx >= relocs_chunk->length_dw) {
    540 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
    541 			  idx, relocs_chunk->length_dw);
    542 		return -EINVAL;
    543 	}
    544 
    545 	reloc = &p->relocs[(idx / 4)];
    546 	start = reloc->gpu_offset;
    547 	end = start + radeon_bo_size(reloc->robj);
    548 	start += offset;
    549 
    550 	p->ib.ptr[lo] = start & 0xFFFFFFFF;
    551 	p->ib.ptr[hi] = start >> 32;
    552 
    553 	if (end <= start) {
    554 		DRM_ERROR("invalid reloc offset %"PRIX64"!\n", offset);
    555 		return -EINVAL;
    556 	}
    557 	if ((end - start) < size) {
    558 		DRM_ERROR("buffer to small (%d / %d)!\n",
    559 			(unsigned)(end - start), size);
    560 		return -EINVAL;
    561 	}
    562 
    563 	return 0;
    564 }
    565 
    566 /**
    567  * radeon_vce_validate_handle - validate stream handle
    568  *
    569  * @p: parser context
    570  * @handle: handle to validate
    571  * @allocated: allocated a new handle?
    572  *
    573  * Validates the handle and return the found session index or -EINVAL
    574  * we we don't have another free session index.
    575  */
    576 static int radeon_vce_validate_handle(struct radeon_cs_parser *p,
    577 				      uint32_t handle, bool *allocated)
    578 {
    579 	unsigned i;
    580 
    581 	*allocated = false;
    582 
    583 	/* validate the handle */
    584 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
    585 		if (atomic_read(&p->rdev->vce.handles[i]) == handle) {
    586 			if (p->rdev->vce.filp[i] != p->filp) {
    587 				DRM_ERROR("VCE handle collision detected!\n");
    588 				return -EINVAL;
    589 			}
    590 			return i;
    591 		}
    592 	}
    593 
    594 	/* handle not found try to alloc a new one */
    595 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
    596 		if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) {
    597 			p->rdev->vce.filp[i] = p->filp;
    598 			p->rdev->vce.img_size[i] = 0;
    599 			*allocated = true;
    600 			return i;
    601 		}
    602 	}
    603 
    604 	DRM_ERROR("No more free VCE handles!\n");
    605 	return -EINVAL;
    606 }
    607 
    608 /**
    609  * radeon_vce_cs_parse - parse and validate the command stream
    610  *
    611  * @p: parser context
    612  *
    613  */
    614 int radeon_vce_cs_parse(struct radeon_cs_parser *p)
    615 {
    616 	int session_idx = -1;
    617 	bool destroyed = false, created = false, allocated = false;
    618 	uint32_t tmp, handle = 0;
    619 	uint32_t *size = &tmp;
    620 	int i, r = 0;
    621 
    622 	while (p->idx < p->chunk_ib->length_dw) {
    623 		uint32_t len = radeon_get_ib_value(p, p->idx);
    624 		uint32_t cmd = radeon_get_ib_value(p, p->idx + 1);
    625 
    626 		if ((len < 8) || (len & 3)) {
    627 			DRM_ERROR("invalid VCE command length (%d)!\n", len);
    628 			r = -EINVAL;
    629 			goto out;
    630 		}
    631 
    632 		if (destroyed) {
    633 			DRM_ERROR("No other command allowed after destroy!\n");
    634 			r = -EINVAL;
    635 			goto out;
    636 		}
    637 
    638 		switch (cmd) {
    639 		case 0x00000001: // session
    640 			handle = radeon_get_ib_value(p, p->idx + 2);
    641 			session_idx = radeon_vce_validate_handle(p, handle,
    642 								 &allocated);
    643 			if (session_idx < 0)
    644 				return session_idx;
    645 			size = &p->rdev->vce.img_size[session_idx];
    646 			break;
    647 
    648 		case 0x00000002: // task info
    649 			break;
    650 
    651 		case 0x01000001: // create
    652 			created = true;
    653 			if (!allocated) {
    654 				DRM_ERROR("Handle already in use!\n");
    655 				r = -EINVAL;
    656 				goto out;
    657 			}
    658 
    659 			*size = radeon_get_ib_value(p, p->idx + 8) *
    660 				radeon_get_ib_value(p, p->idx + 10) *
    661 				8 * 3 / 2;
    662 			break;
    663 
    664 		case 0x04000001: // config extension
    665 		case 0x04000002: // pic control
    666 		case 0x04000005: // rate control
    667 		case 0x04000007: // motion estimation
    668 		case 0x04000008: // rdo
    669 		case 0x04000009: // vui
    670 			break;
    671 
    672 		case 0x03000001: // encode
    673 			r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9,
    674 						*size);
    675 			if (r)
    676 				goto out;
    677 
    678 			r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11,
    679 						*size / 3);
    680 			if (r)
    681 				goto out;
    682 			break;
    683 
    684 		case 0x02000001: // destroy
    685 			destroyed = true;
    686 			break;
    687 
    688 		case 0x05000001: // context buffer
    689 			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
    690 						*size * 2);
    691 			if (r)
    692 				goto out;
    693 			break;
    694 
    695 		case 0x05000004: // video bitstream buffer
    696 			tmp = radeon_get_ib_value(p, p->idx + 4);
    697 			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
    698 						tmp);
    699 			if (r)
    700 				goto out;
    701 			break;
    702 
    703 		case 0x05000005: // feedback buffer
    704 			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
    705 						4096);
    706 			if (r)
    707 				goto out;
    708 			break;
    709 
    710 		default:
    711 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
    712 			r = -EINVAL;
    713 			goto out;
    714 		}
    715 
    716 		if (session_idx == -1) {
    717 			DRM_ERROR("no session command at start of IB\n");
    718 			r = -EINVAL;
    719 			goto out;
    720 		}
    721 
    722 		p->idx += len / 4;
    723 	}
    724 
    725 	if (allocated && !created) {
    726 		DRM_ERROR("New session without create command!\n");
    727 		r = -ENOENT;
    728 	}
    729 
    730 out:
    731 	if ((!r && destroyed) || (r && allocated)) {
    732 		/*
    733 		 * IB contains a destroy msg or we have allocated an
    734 		 * handle and got an error, anyway free the handle
    735 		 */
    736 		for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
    737 			atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0);
    738 	}
    739 
    740 	return r;
    741 }
    742 
    743 /**
    744  * radeon_vce_semaphore_emit - emit a semaphore command
    745  *
    746  * @rdev: radeon_device pointer
    747  * @ring: engine to use
    748  * @semaphore: address of semaphore
    749  * @emit_wait: true=emit wait, false=emit signal
    750  *
    751  */
    752 bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
    753 			       struct radeon_ring *ring,
    754 			       struct radeon_semaphore *semaphore,
    755 			       bool emit_wait)
    756 {
    757 	uint64_t addr = semaphore->gpu_addr;
    758 
    759 	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_SEMAPHORE));
    760 	radeon_ring_write(ring, cpu_to_le32((addr >> 3) & 0x000FFFFF));
    761 	radeon_ring_write(ring, cpu_to_le32((addr >> 23) & 0x000FFFFF));
    762 	radeon_ring_write(ring, cpu_to_le32(0x01003000 | (emit_wait ? 1 : 0)));
    763 	if (!emit_wait)
    764 		radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
    765 
    766 	return true;
    767 }
    768 
    769 /**
    770  * radeon_vce_ib_execute - execute indirect buffer
    771  *
    772  * @rdev: radeon_device pointer
    773  * @ib: the IB to execute
    774  *
    775  */
    776 void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
    777 {
    778 	struct radeon_ring *ring = &rdev->ring[ib->ring];
    779 	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_IB));
    780 	radeon_ring_write(ring, cpu_to_le32(ib->gpu_addr));
    781 	radeon_ring_write(ring, cpu_to_le32(upper_32_bits(ib->gpu_addr)));
    782 	radeon_ring_write(ring, cpu_to_le32(ib->length_dw));
    783 }
    784 
    785 /**
    786  * radeon_vce_fence_emit - add a fence command to the ring
    787  *
    788  * @rdev: radeon_device pointer
    789  * @fence: the fence
    790  *
    791  */
    792 void radeon_vce_fence_emit(struct radeon_device *rdev,
    793 			   struct radeon_fence *fence)
    794 {
    795 	struct radeon_ring *ring = &rdev->ring[fence->ring];
    796 	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
    797 
    798 	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_FENCE));
    799 	radeon_ring_write(ring, cpu_to_le32(addr));
    800 	radeon_ring_write(ring, cpu_to_le32(upper_32_bits(addr)));
    801 	radeon_ring_write(ring, cpu_to_le32(fence->seq));
    802 	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_TRAP));
    803 	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
    804 }
    805 
    806 /**
    807  * radeon_vce_ring_test - test if VCE ring is working
    808  *
    809  * @rdev: radeon_device pointer
    810  * @ring: the engine to test on
    811  *
    812  */
    813 int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
    814 {
    815 	uint32_t rptr = vce_v1_0_get_rptr(rdev, ring);
    816 	unsigned i;
    817 	int r;
    818 
    819 	r = radeon_ring_lock(rdev, ring, 16);
    820 	if (r) {
    821 		DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n",
    822 			  ring->idx, r);
    823 		return r;
    824 	}
    825 	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
    826 	radeon_ring_unlock_commit(rdev, ring, false);
    827 
    828 	for (i = 0; i < rdev->usec_timeout; i++) {
    829 		if (vce_v1_0_get_rptr(rdev, ring) != rptr)
    830 			break;
    831 		udelay(1);
    832 	}
    833 
    834 	if (i < rdev->usec_timeout) {
    835 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
    836 			 ring->idx, i);
    837 	} else {
    838 		DRM_ERROR("radeon: ring %d test failed\n",
    839 			 ring->idx);
    840 		r = -ETIMEDOUT;
    841 	}
    842 
    843 	return r;
    844 }
    845 
    846 /**
    847  * radeon_vce_ib_test - test if VCE IBs are working
    848  *
    849  * @rdev: radeon_device pointer
    850  * @ring: the engine to test on
    851  *
    852  */
    853 int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
    854 {
    855 	struct radeon_fence *fence = NULL;
    856 	int r;
    857 
    858 	r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL);
    859 	if (r) {
    860 		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
    861 		goto error;
    862 	}
    863 
    864 	r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence);
    865 	if (r) {
    866 		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
    867 		goto error;
    868 	}
    869 
    870 	r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
    871 		RADEON_USEC_IB_TEST_TIMEOUT));
    872 	if (r < 0) {
    873 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
    874 	} else if (r == 0) {
    875 		DRM_ERROR("radeon: fence wait timed out.\n");
    876 		r = -ETIMEDOUT;
    877 	} else {
    878 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
    879 		r = 0;
    880 	}
    881 error:
    882 	radeon_fence_unref(&fence);
    883 	return r;
    884 }
    885