Home | History | Annotate | Line # | Download | only in amdgpu
amdgpu_vce.c revision 1.3
      1 /*	$NetBSD: amdgpu_vce.c,v 1.3 2018/08/27 14:04:50 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2013 Advanced Micro Devices, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  * Authors: Christian Knig <christian.koenig (at) amd.com>
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce.c,v 1.3 2018/08/27 14:04:50 riastradh Exp $");
     32 
     33 #include <linux/firmware.h>
     34 #include <linux/module.h>
     35 #include <asm/byteorder.h>
     36 #include <drm/drmP.h>
     37 #include <drm/drm.h>
     38 
     39 #include "amdgpu.h"
     40 #include "amdgpu_pm.h"
     41 #include "amdgpu_vce.h"
     42 #include "cikd.h"
     43 
     44 /* 1 second timeout */
     45 #define VCE_IDLE_TIMEOUT_MS	1000
     46 
     47 /* Firmware Names */
     48 #ifdef CONFIG_DRM_AMDGPU_CIK
     49 #define FIRMWARE_BONAIRE	"radeon/bonaire_vce.bin"
     50 #define FIRMWARE_KABINI 	"radeon/kabini_vce.bin"
     51 #define FIRMWARE_KAVERI 	"radeon/kaveri_vce.bin"
     52 #define FIRMWARE_HAWAII 	"radeon/hawaii_vce.bin"
     53 #define FIRMWARE_MULLINS	"radeon/mullins_vce.bin"
     54 #endif
     55 #define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
     56 #define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
     57 #define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
     58 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
     59 
     60 #ifdef CONFIG_DRM_AMDGPU_CIK
     61 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
     62 MODULE_FIRMWARE(FIRMWARE_KABINI);
     63 MODULE_FIRMWARE(FIRMWARE_KAVERI);
     64 MODULE_FIRMWARE(FIRMWARE_HAWAII);
     65 MODULE_FIRMWARE(FIRMWARE_MULLINS);
     66 #endif
     67 MODULE_FIRMWARE(FIRMWARE_TONGA);
     68 MODULE_FIRMWARE(FIRMWARE_CARRIZO);
     69 MODULE_FIRMWARE(FIRMWARE_FIJI);
     70 MODULE_FIRMWARE(FIRMWARE_STONEY);
     71 
     72 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
     73 
     74 /**
     75  * amdgpu_vce_init - allocate memory, load vce firmware
     76  *
     77  * @adev: amdgpu_device pointer
     78  *
     79  * First step to get VCE online, allocate memory and load the firmware
     80  */
     81 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
     82 {
     83 	const char *fw_name;
     84 	const struct common_firmware_header *hdr;
     85 	unsigned ucode_version, version_major, version_minor, binary_id;
     86 	int i, r;
     87 
     88 	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
     89 
     90 	switch (adev->asic_type) {
     91 #ifdef CONFIG_DRM_AMDGPU_CIK
     92 	case CHIP_BONAIRE:
     93 		fw_name = FIRMWARE_BONAIRE;
     94 		break;
     95 	case CHIP_KAVERI:
     96 		fw_name = FIRMWARE_KAVERI;
     97 		break;
     98 	case CHIP_KABINI:
     99 		fw_name = FIRMWARE_KABINI;
    100 		break;
    101 	case CHIP_HAWAII:
    102 		fw_name = FIRMWARE_HAWAII;
    103 		break;
    104 	case CHIP_MULLINS:
    105 		fw_name = FIRMWARE_MULLINS;
    106 		break;
    107 #endif
    108 	case CHIP_TONGA:
    109 		fw_name = FIRMWARE_TONGA;
    110 		break;
    111 	case CHIP_CARRIZO:
    112 		fw_name = FIRMWARE_CARRIZO;
    113 		break;
    114 	case CHIP_FIJI:
    115 		fw_name = FIRMWARE_FIJI;
    116 		break;
    117 	case CHIP_STONEY:
    118 		fw_name = FIRMWARE_STONEY;
    119 		break;
    120 
    121 	default:
    122 		return -EINVAL;
    123 	}
    124 
    125 	r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
    126 	if (r) {
    127 		dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
    128 			fw_name);
    129 		return r;
    130 	}
    131 
    132 	r = amdgpu_ucode_validate(adev->vce.fw);
    133 	if (r) {
    134 		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
    135 			fw_name);
    136 		release_firmware(adev->vce.fw);
    137 		adev->vce.fw = NULL;
    138 		return r;
    139 	}
    140 
    141 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    142 
    143 	ucode_version = le32_to_cpu(hdr->ucode_version);
    144 	version_major = (ucode_version >> 20) & 0xfff;
    145 	version_minor = (ucode_version >> 8) & 0xfff;
    146 	binary_id = ucode_version & 0xff;
    147 	DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n",
    148 		version_major, version_minor, binary_id);
    149 	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
    150 				(binary_id << 8));
    151 
    152 	/* allocate firmware, stack and heap BO */
    153 
    154 	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
    155 			     AMDGPU_GEM_DOMAIN_VRAM,
    156 			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
    157 			     NULL, NULL, &adev->vce.vcpu_bo);
    158 	if (r) {
    159 		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
    160 		return r;
    161 	}
    162 
    163 	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
    164 	if (r) {
    165 		amdgpu_bo_unref(&adev->vce.vcpu_bo);
    166 		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
    167 		return r;
    168 	}
    169 
    170 	r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
    171 			  &adev->vce.gpu_addr);
    172 	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    173 	if (r) {
    174 		amdgpu_bo_unref(&adev->vce.vcpu_bo);
    175 		dev_err(adev->dev, "(%d) VCE bo pin failed\n", r);
    176 		return r;
    177 	}
    178 
    179 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    180 		atomic_set(&adev->vce.handles[i], 0);
    181 		adev->vce.filp[i] = NULL;
    182 	}
    183 
    184 	return 0;
    185 }
    186 
    187 /**
    188  * amdgpu_vce_fini - free memory
    189  *
    190  * @adev: amdgpu_device pointer
    191  *
    192  * Last step on VCE teardown, free firmware memory
    193  */
    194 int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
    195 {
    196 	if (adev->vce.vcpu_bo == NULL)
    197 		return 0;
    198 
    199 	amdgpu_bo_unref(&adev->vce.vcpu_bo);
    200 
    201 	amdgpu_ring_fini(&adev->vce.ring[0]);
    202 	amdgpu_ring_fini(&adev->vce.ring[1]);
    203 
    204 	release_firmware(adev->vce.fw);
    205 
    206 	return 0;
    207 }
    208 
    209 /**
    210  * amdgpu_vce_suspend - unpin VCE fw memory
    211  *
    212  * @adev: amdgpu_device pointer
    213  *
    214  */
    215 int amdgpu_vce_suspend(struct amdgpu_device *adev)
    216 {
    217 	int i;
    218 
    219 	if (adev->vce.vcpu_bo == NULL)
    220 		return 0;
    221 
    222 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
    223 		if (atomic_read(&adev->vce.handles[i]))
    224 			break;
    225 
    226 	if (i == AMDGPU_MAX_VCE_HANDLES)
    227 		return 0;
    228 
    229 	cancel_delayed_work_sync(&adev->vce.idle_work);
    230 	/* TODO: suspending running encoding sessions isn't supported */
    231 	return -EINVAL;
    232 }
    233 
    234 /**
    235  * amdgpu_vce_resume - pin VCE fw memory
    236  *
    237  * @adev: amdgpu_device pointer
    238  *
    239  */
    240 int amdgpu_vce_resume(struct amdgpu_device *adev)
    241 {
    242 	void *cpu_addr;
    243 	const struct common_firmware_header *hdr;
    244 	unsigned offset;
    245 	int r;
    246 
    247 	if (adev->vce.vcpu_bo == NULL)
    248 		return -EINVAL;
    249 
    250 	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
    251 	if (r) {
    252 		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
    253 		return r;
    254 	}
    255 
    256 	r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
    257 	if (r) {
    258 		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    259 		dev_err(adev->dev, "(%d) VCE map failed\n", r);
    260 		return r;
    261 	}
    262 
    263 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    264 	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
    265 	memcpy(cpu_addr, (adev->vce.fw->data) + offset,
    266 		(adev->vce.fw->size) - offset);
    267 
    268 	amdgpu_bo_kunmap(adev->vce.vcpu_bo);
    269 
    270 	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    271 
    272 	return 0;
    273 }
    274 
    275 /**
    276  * amdgpu_vce_idle_work_handler - power off VCE
    277  *
    278  * @work: pointer to work structure
    279  *
    280  * power of VCE when it's not used any more
    281  */
    282 static void amdgpu_vce_idle_work_handler(struct work_struct *work)
    283 {
    284 	struct amdgpu_device *adev =
    285 		container_of(work, struct amdgpu_device, vce.idle_work.work);
    286 
    287 	if ((amdgpu_fence_count_emitted(&adev->vce.ring[0]) == 0) &&
    288 	    (amdgpu_fence_count_emitted(&adev->vce.ring[1]) == 0)) {
    289 		if (adev->pm.dpm_enabled) {
    290 			amdgpu_dpm_enable_vce(adev, false);
    291 		} else {
    292 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
    293 			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    294 							    AMD_PG_STATE_GATE);
    295 			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    296 							    AMD_CG_STATE_GATE);
    297 		}
    298 	} else {
    299 		schedule_delayed_work(&adev->vce.idle_work,
    300 				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
    301 	}
    302 }
    303 
    304 /**
    305  * amdgpu_vce_note_usage - power up VCE
    306  *
    307  * @adev: amdgpu_device pointer
    308  *
    309  * Make sure VCE is powerd up when we want to use it
    310  */
    311 static void amdgpu_vce_note_usage(struct amdgpu_device *adev)
    312 {
    313 	bool streams_changed = false;
    314 	bool set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
    315 	set_clocks &= schedule_delayed_work(&adev->vce.idle_work,
    316 					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
    317 
    318 	if (adev->pm.dpm_enabled) {
    319 		/* XXX figure out if the streams changed */
    320 		streams_changed = false;
    321 	}
    322 
    323 	if (set_clocks || streams_changed) {
    324 		if (adev->pm.dpm_enabled) {
    325 			amdgpu_dpm_enable_vce(adev, true);
    326 		} else {
    327 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
    328 			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    329 							    AMD_CG_STATE_UNGATE);
    330 			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    331 							    AMD_PG_STATE_UNGATE);
    332 
    333 		}
    334 	}
    335 }
    336 
    337 /**
    338  * amdgpu_vce_free_handles - free still open VCE handles
    339  *
    340  * @adev: amdgpu_device pointer
    341  * @filp: drm file pointer
    342  *
    343  * Close all VCE handles still open by this file pointer
    344  */
    345 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
    346 {
    347 	struct amdgpu_ring *ring = &adev->vce.ring[0];
    348 	int i, r;
    349 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    350 		uint32_t handle = atomic_read(&adev->vce.handles[i]);
    351 		if (!handle || adev->vce.filp[i] != filp)
    352 			continue;
    353 
    354 		amdgpu_vce_note_usage(adev);
    355 
    356 		r = amdgpu_vce_get_destroy_msg(ring, handle, NULL);
    357 		if (r)
    358 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
    359 
    360 		adev->vce.filp[i] = NULL;
    361 		atomic_set(&adev->vce.handles[i], 0);
    362 	}
    363 }
    364 
    365 static int amdgpu_vce_free_job(
    366 	struct amdgpu_job *job)
    367 {
    368 	amdgpu_ib_free(job->adev, job->ibs);
    369 	kfree(job->ibs);
    370 	return 0;
    371 }
    372 
    373 /**
    374  * amdgpu_vce_get_create_msg - generate a VCE create msg
    375  *
    376  * @adev: amdgpu_device pointer
    377  * @ring: ring we should submit the msg to
    378  * @handle: VCE session handle to use
    379  * @fence: optional fence to return
    380  *
    381  * Open up a stream for HW test
    382  */
    383 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
    384 			      struct fence **fence)
    385 {
    386 	const unsigned ib_size_dw = 1024;
    387 	struct amdgpu_ib *ib = NULL;
    388 	struct fence *f = NULL;
    389 	struct amdgpu_device *adev = ring->adev;
    390 	uint64_t dummy;
    391 	int i, r;
    392 
    393 	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
    394 	if (!ib)
    395 		return -ENOMEM;
    396 	r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
    397 	if (r) {
    398 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
    399 		kfree(ib);
    400 		return r;
    401 	}
    402 
    403 	dummy = ib->gpu_addr + 1024;
    404 
    405 	/* stitch together an VCE create msg */
    406 	ib->length_dw = 0;
    407 	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
    408 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
    409 	ib->ptr[ib->length_dw++] = handle;
    410 
    411 	if ((ring->adev->vce.fw_version >> 24) >= 52)
    412 		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
    413 	else
    414 		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
    415 	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
    416 	ib->ptr[ib->length_dw++] = 0x00000000;
    417 	ib->ptr[ib->length_dw++] = 0x00000042;
    418 	ib->ptr[ib->length_dw++] = 0x0000000a;
    419 	ib->ptr[ib->length_dw++] = 0x00000001;
    420 	ib->ptr[ib->length_dw++] = 0x00000080;
    421 	ib->ptr[ib->length_dw++] = 0x00000060;
    422 	ib->ptr[ib->length_dw++] = 0x00000100;
    423 	ib->ptr[ib->length_dw++] = 0x00000100;
    424 	ib->ptr[ib->length_dw++] = 0x0000000c;
    425 	ib->ptr[ib->length_dw++] = 0x00000000;
    426 	if ((ring->adev->vce.fw_version >> 24) >= 52) {
    427 		ib->ptr[ib->length_dw++] = 0x00000000;
    428 		ib->ptr[ib->length_dw++] = 0x00000000;
    429 		ib->ptr[ib->length_dw++] = 0x00000000;
    430 		ib->ptr[ib->length_dw++] = 0x00000000;
    431 	}
    432 
    433 	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
    434 	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
    435 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
    436 	ib->ptr[ib->length_dw++] = dummy;
    437 	ib->ptr[ib->length_dw++] = 0x00000001;
    438 
    439 	for (i = ib->length_dw; i < ib_size_dw; ++i)
    440 		ib->ptr[i] = 0x0;
    441 
    442 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
    443 						 &amdgpu_vce_free_job,
    444 						 AMDGPU_FENCE_OWNER_UNDEFINED,
    445 						 &f);
    446 	if (r)
    447 		goto err;
    448 	if (fence)
    449 		*fence = fence_get(f);
    450 	fence_put(f);
    451 	if (amdgpu_enable_scheduler)
    452 		return 0;
    453 err:
    454 	amdgpu_ib_free(adev, ib);
    455 	kfree(ib);
    456 	return r;
    457 }
    458 
    459 /**
    460  * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
    461  *
    462  * @adev: amdgpu_device pointer
    463  * @ring: ring we should submit the msg to
    464  * @handle: VCE session handle to use
    465  * @fence: optional fence to return
    466  *
    467  * Close up a stream for HW test or if userspace failed to do so
    468  */
    469 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
    470 			       struct fence **fence)
    471 {
    472 	const unsigned ib_size_dw = 1024;
    473 	struct amdgpu_ib *ib = NULL;
    474 	struct fence *f = NULL;
    475 	struct amdgpu_device *adev = ring->adev;
    476 	uint64_t dummy;
    477 	int i, r;
    478 
    479 	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
    480 	if (!ib)
    481 		return -ENOMEM;
    482 
    483 	r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
    484 	if (r) {
    485 		kfree(ib);
    486 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
    487 		return r;
    488 	}
    489 
    490 	dummy = ib->gpu_addr + 1024;
    491 
    492 	/* stitch together an VCE destroy msg */
    493 	ib->length_dw = 0;
    494 	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
    495 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
    496 	ib->ptr[ib->length_dw++] = handle;
    497 
    498 	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
    499 	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
    500 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
    501 	ib->ptr[ib->length_dw++] = dummy;
    502 	ib->ptr[ib->length_dw++] = 0x00000001;
    503 
    504 	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
    505 	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
    506 
    507 	for (i = ib->length_dw; i < ib_size_dw; ++i)
    508 		ib->ptr[i] = 0x0;
    509 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
    510 						 &amdgpu_vce_free_job,
    511 						 AMDGPU_FENCE_OWNER_UNDEFINED,
    512 						 &f);
    513 	if (r)
    514 		goto err;
    515 	if (fence)
    516 		*fence = fence_get(f);
    517 	fence_put(f);
    518 	if (amdgpu_enable_scheduler)
    519 		return 0;
    520 err:
    521 	amdgpu_ib_free(adev, ib);
    522 	kfree(ib);
    523 	return r;
    524 }
    525 
    526 /**
    527  * amdgpu_vce_cs_reloc - command submission relocation
    528  *
    529  * @p: parser context
    530  * @lo: address of lower dword
    531  * @hi: address of higher dword
    532  * @size: minimum size
    533  *
    534  * Patch relocation inside command stream with real buffer address
    535  */
    536 static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
    537 			       int lo, int hi, unsigned size, uint32_t index)
    538 {
    539 	struct amdgpu_bo_va_mapping *mapping;
    540 	struct amdgpu_ib *ib = &p->ibs[ib_idx];
    541 	struct amdgpu_bo *bo;
    542 	uint64_t addr;
    543 
    544 	if (index == 0xffffffff)
    545 		index = 0;
    546 
    547 	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
    548 	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
    549 	addr += ((uint64_t)size) * ((uint64_t)index);
    550 
    551 	mapping = amdgpu_cs_find_mapping(p, addr, &bo);
    552 	if (mapping == NULL) {
    553 		DRM_ERROR("Can't find BO for addr 0x%010"PRIx64" %d %d %d %d\n",
    554 			  addr, lo, hi, size, index);
    555 		return -EINVAL;
    556 	}
    557 
    558 	if ((addr + (uint64_t)size) >
    559 	    ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
    560 		DRM_ERROR("BO to small for addr 0x%010"PRIx64" %d %d\n",
    561 			  addr, lo, hi);
    562 		return -EINVAL;
    563 	}
    564 
    565 	addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
    566 	addr += amdgpu_bo_gpu_offset(bo);
    567 	addr -= ((uint64_t)size) * ((uint64_t)index);
    568 
    569 	ib->ptr[lo] = addr & 0xFFFFFFFF;
    570 	ib->ptr[hi] = addr >> 32;
    571 
    572 	return 0;
    573 }
    574 
    575 /**
    576  * amdgpu_vce_validate_handle - validate stream handle
    577  *
    578  * @p: parser context
    579  * @handle: handle to validate
    580  * @allocated: allocated a new handle?
    581  *
    582  * Validates the handle and return the found session index or -EINVAL
    583  * we we don't have another free session index.
    584  */
    585 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
    586 				      uint32_t handle, bool *allocated)
    587 {
    588 	unsigned i;
    589 
    590 	*allocated = false;
    591 
    592 	/* validate the handle */
    593 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    594 		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
    595 			if (p->adev->vce.filp[i] != p->filp) {
    596 				DRM_ERROR("VCE handle collision detected!\n");
    597 				return -EINVAL;
    598 			}
    599 			return i;
    600 		}
    601 	}
    602 
    603 	/* handle not found try to alloc a new one */
    604 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    605 		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
    606 			p->adev->vce.filp[i] = p->filp;
    607 			p->adev->vce.img_size[i] = 0;
    608 			*allocated = true;
    609 			return i;
    610 		}
    611 	}
    612 
    613 	DRM_ERROR("No more free VCE handles!\n");
    614 	return -EINVAL;
    615 }
    616 
    617 /**
    618  * amdgpu_vce_cs_parse - parse and validate the command stream
    619  *
    620  * @p: parser context
    621  *
    622  */
    623 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
    624 {
    625 	struct amdgpu_ib *ib = &p->ibs[ib_idx];
    626 	unsigned fb_idx = 0, bs_idx = 0;
    627 	int session_idx = -1;
    628 	bool destroyed = false;
    629 	bool created = false;
    630 	bool allocated = false;
    631 	uint32_t tmp, handle = 0;
    632 	uint32_t *size = &tmp;
    633 	int i, r = 0, idx = 0;
    634 
    635 	amdgpu_vce_note_usage(p->adev);
    636 
    637 	while (idx < ib->length_dw) {
    638 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
    639 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
    640 
    641 		if ((len < 8) || (len & 3)) {
    642 			DRM_ERROR("invalid VCE command length (%d)!\n", len);
    643 			r = -EINVAL;
    644 			goto out;
    645 		}
    646 
    647 		if (destroyed) {
    648 			DRM_ERROR("No other command allowed after destroy!\n");
    649 			r = -EINVAL;
    650 			goto out;
    651 		}
    652 
    653 		switch (cmd) {
    654 		case 0x00000001: // session
    655 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
    656 			session_idx = amdgpu_vce_validate_handle(p, handle,
    657 								 &allocated);
    658 			if (session_idx < 0)
    659 				return session_idx;
    660 			size = &p->adev->vce.img_size[session_idx];
    661 			break;
    662 
    663 		case 0x00000002: // task info
    664 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
    665 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
    666 			break;
    667 
    668 		case 0x01000001: // create
    669 			created = true;
    670 			if (!allocated) {
    671 				DRM_ERROR("Handle already in use!\n");
    672 				r = -EINVAL;
    673 				goto out;
    674 			}
    675 
    676 			*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
    677 				amdgpu_get_ib_value(p, ib_idx, idx + 10) *
    678 				8 * 3 / 2;
    679 			break;
    680 
    681 		case 0x04000001: // config extension
    682 		case 0x04000002: // pic control
    683 		case 0x04000005: // rate control
    684 		case 0x04000007: // motion estimation
    685 		case 0x04000008: // rdo
    686 		case 0x04000009: // vui
    687 		case 0x05000002: // auxiliary buffer
    688 			break;
    689 
    690 		case 0x03000001: // encode
    691 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
    692 						*size, 0);
    693 			if (r)
    694 				goto out;
    695 
    696 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
    697 						*size / 3, 0);
    698 			if (r)
    699 				goto out;
    700 			break;
    701 
    702 		case 0x02000001: // destroy
    703 			destroyed = true;
    704 			break;
    705 
    706 		case 0x05000001: // context buffer
    707 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
    708 						*size * 2, 0);
    709 			if (r)
    710 				goto out;
    711 			break;
    712 
    713 		case 0x05000004: // video bitstream buffer
    714 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
    715 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
    716 						tmp, bs_idx);
    717 			if (r)
    718 				goto out;
    719 			break;
    720 
    721 		case 0x05000005: // feedback buffer
    722 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
    723 						4096, fb_idx);
    724 			if (r)
    725 				goto out;
    726 			break;
    727 
    728 		default:
    729 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
    730 			r = -EINVAL;
    731 			goto out;
    732 		}
    733 
    734 		if (session_idx == -1) {
    735 			DRM_ERROR("no session command at start of IB\n");
    736 			r = -EINVAL;
    737 			goto out;
    738 		}
    739 
    740 		idx += len / 4;
    741 	}
    742 
    743 	if (allocated && !created) {
    744 		DRM_ERROR("New session without create command!\n");
    745 		r = -ENOENT;
    746 	}
    747 
    748 out:
    749 	if ((!r && destroyed) || (r && allocated)) {
    750 		/*
    751 		 * IB contains a destroy msg or we have allocated an
    752 		 * handle and got an error, anyway free the handle
    753 		 */
    754 		for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
    755 			atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
    756 	}
    757 
    758 	return r;
    759 }
    760 
    761 /**
    762  * amdgpu_vce_ring_emit_semaphore - emit a semaphore command
    763  *
    764  * @ring: engine to use
    765  * @semaphore: address of semaphore
    766  * @emit_wait: true=emit wait, false=emit signal
    767  *
    768  */
    769 bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
    770 				    struct amdgpu_semaphore *semaphore,
    771 				    bool emit_wait)
    772 {
    773 	uint64_t addr = semaphore->gpu_addr;
    774 
    775 	amdgpu_ring_write(ring, VCE_CMD_SEMAPHORE);
    776 	amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
    777 	amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
    778 	amdgpu_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
    779 	if (!emit_wait)
    780 		amdgpu_ring_write(ring, VCE_CMD_END);
    781 
    782 	return true;
    783 }
    784 
    785 /**
    786  * amdgpu_vce_ring_emit_ib - execute indirect buffer
    787  *
    788  * @ring: engine to use
    789  * @ib: the IB to execute
    790  *
    791  */
    792 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
    793 {
    794 	amdgpu_ring_write(ring, VCE_CMD_IB);
    795 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
    796 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
    797 	amdgpu_ring_write(ring, ib->length_dw);
    798 }
    799 
    800 /**
    801  * amdgpu_vce_ring_emit_fence - add a fence command to the ring
    802  *
    803  * @ring: engine to use
    804  * @fence: the fence
    805  *
    806  */
    807 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
    808 				unsigned flags)
    809 {
    810 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
    811 
    812 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
    813 	amdgpu_ring_write(ring, addr);
    814 	amdgpu_ring_write(ring, upper_32_bits(addr));
    815 	amdgpu_ring_write(ring, seq);
    816 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
    817 	amdgpu_ring_write(ring, VCE_CMD_END);
    818 }
    819 
    820 /**
    821  * amdgpu_vce_ring_test_ring - test if VCE ring is working
    822  *
    823  * @ring: the engine to test on
    824  *
    825  */
    826 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
    827 {
    828 	struct amdgpu_device *adev = ring->adev;
    829 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
    830 	unsigned i;
    831 	int r;
    832 
    833 	r = amdgpu_ring_lock(ring, 16);
    834 	if (r) {
    835 		DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
    836 			  ring->idx, r);
    837 		return r;
    838 	}
    839 	amdgpu_ring_write(ring, VCE_CMD_END);
    840 	amdgpu_ring_unlock_commit(ring);
    841 
    842 	for (i = 0; i < adev->usec_timeout; i++) {
    843 		if (amdgpu_ring_get_rptr(ring) != rptr)
    844 			break;
    845 		DRM_UDELAY(1);
    846 	}
    847 
    848 	if (i < adev->usec_timeout) {
    849 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
    850 			 ring->idx, i);
    851 	} else {
    852 		DRM_ERROR("amdgpu: ring %d test failed\n",
    853 			  ring->idx);
    854 		r = -ETIMEDOUT;
    855 	}
    856 
    857 	return r;
    858 }
    859 
    860 /**
    861  * amdgpu_vce_ring_test_ib - test if VCE IBs are working
    862  *
    863  * @ring: the engine to test on
    864  *
    865  */
    866 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
    867 {
    868 	struct fence *fence = NULL;
    869 	int r;
    870 
    871 	/* skip vce ring1 ib test for now, since it's not reliable */
    872 	if (ring == &ring->adev->vce.ring[1])
    873 		return 0;
    874 
    875 	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
    876 	if (r) {
    877 		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
    878 		goto error;
    879 	}
    880 
    881 	r = amdgpu_vce_get_destroy_msg(ring, 1, &fence);
    882 	if (r) {
    883 		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
    884 		goto error;
    885 	}
    886 
    887 	r = fence_wait(fence, false);
    888 	if (r) {
    889 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
    890 	} else {
    891 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
    892 	}
    893 error:
    894 	fence_put(fence);
    895 	return r;
    896 }
    897