Home | History | Annotate | Line # | Download | only in amdgpu
amdgpu_vce.c revision 1.1.1.2
      1 /*	$NetBSD: amdgpu_vce.c,v 1.1.1.2 2021/12/18 20:11:12 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2013 Advanced Micro Devices, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  * Authors: Christian Knig <christian.koenig (at) amd.com>
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce.c,v 1.1.1.2 2021/12/18 20:11:12 riastradh Exp $");
     32 
     33 #include <linux/firmware.h>
     34 #include <linux/module.h>
     35 
     36 #include <drm/drm.h>
     37 
     38 #include "amdgpu.h"
     39 #include "amdgpu_pm.h"
     40 #include "amdgpu_vce.h"
     41 #include "cikd.h"
     42 
     43 /* 1 second timeout */
     44 #define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
     45 
     46 /* Firmware Names */
     47 #ifdef CONFIG_DRM_AMDGPU_CIK
     48 #define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
     49 #define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
     50 #define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
     51 #define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
     52 #define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
     53 #endif
     54 #define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
     55 #define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
     56 #define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
     57 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
     58 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
     59 #define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
     60 #define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
     61 #define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
     62 
     63 #define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
     64 #define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
     65 #define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
     66 
     67 #ifdef CONFIG_DRM_AMDGPU_CIK
     68 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
     69 MODULE_FIRMWARE(FIRMWARE_KABINI);
     70 MODULE_FIRMWARE(FIRMWARE_KAVERI);
     71 MODULE_FIRMWARE(FIRMWARE_HAWAII);
     72 MODULE_FIRMWARE(FIRMWARE_MULLINS);
     73 #endif
     74 MODULE_FIRMWARE(FIRMWARE_TONGA);
     75 MODULE_FIRMWARE(FIRMWARE_CARRIZO);
     76 MODULE_FIRMWARE(FIRMWARE_FIJI);
     77 MODULE_FIRMWARE(FIRMWARE_STONEY);
     78 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
     79 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
     80 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
     81 MODULE_FIRMWARE(FIRMWARE_VEGAM);
     82 
     83 MODULE_FIRMWARE(FIRMWARE_VEGA10);
     84 MODULE_FIRMWARE(FIRMWARE_VEGA12);
     85 MODULE_FIRMWARE(FIRMWARE_VEGA20);
     86 
     87 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
     88 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
     89 				     struct amdgpu_bo *bo,
     90 				     struct dma_fence **fence);
     91 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
     92 				      bool direct, struct dma_fence **fence);
     93 
     94 /**
     95  * amdgpu_vce_init - allocate memory, load vce firmware
     96  *
     97  * @adev: amdgpu_device pointer
     98  *
     99  * First step to get VCE online, allocate memory and load the firmware
    100  */
    101 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
    102 {
    103 	const char *fw_name;
    104 	const struct common_firmware_header *hdr;
    105 	unsigned ucode_version, version_major, version_minor, binary_id;
    106 	int i, r;
    107 
    108 	switch (adev->asic_type) {
    109 #ifdef CONFIG_DRM_AMDGPU_CIK
    110 	case CHIP_BONAIRE:
    111 		fw_name = FIRMWARE_BONAIRE;
    112 		break;
    113 	case CHIP_KAVERI:
    114 		fw_name = FIRMWARE_KAVERI;
    115 		break;
    116 	case CHIP_KABINI:
    117 		fw_name = FIRMWARE_KABINI;
    118 		break;
    119 	case CHIP_HAWAII:
    120 		fw_name = FIRMWARE_HAWAII;
    121 		break;
    122 	case CHIP_MULLINS:
    123 		fw_name = FIRMWARE_MULLINS;
    124 		break;
    125 #endif
    126 	case CHIP_TONGA:
    127 		fw_name = FIRMWARE_TONGA;
    128 		break;
    129 	case CHIP_CARRIZO:
    130 		fw_name = FIRMWARE_CARRIZO;
    131 		break;
    132 	case CHIP_FIJI:
    133 		fw_name = FIRMWARE_FIJI;
    134 		break;
    135 	case CHIP_STONEY:
    136 		fw_name = FIRMWARE_STONEY;
    137 		break;
    138 	case CHIP_POLARIS10:
    139 		fw_name = FIRMWARE_POLARIS10;
    140 		break;
    141 	case CHIP_POLARIS11:
    142 		fw_name = FIRMWARE_POLARIS11;
    143 		break;
    144 	case CHIP_POLARIS12:
    145 		fw_name = FIRMWARE_POLARIS12;
    146 		break;
    147 	case CHIP_VEGAM:
    148 		fw_name = FIRMWARE_VEGAM;
    149 		break;
    150 	case CHIP_VEGA10:
    151 		fw_name = FIRMWARE_VEGA10;
    152 		break;
    153 	case CHIP_VEGA12:
    154 		fw_name = FIRMWARE_VEGA12;
    155 		break;
    156 	case CHIP_VEGA20:
    157 		fw_name = FIRMWARE_VEGA20;
    158 		break;
    159 
    160 	default:
    161 		return -EINVAL;
    162 	}
    163 
    164 	r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
    165 	if (r) {
    166 		dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
    167 			fw_name);
    168 		return r;
    169 	}
    170 
    171 	r = amdgpu_ucode_validate(adev->vce.fw);
    172 	if (r) {
    173 		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
    174 			fw_name);
    175 		release_firmware(adev->vce.fw);
    176 		adev->vce.fw = NULL;
    177 		return r;
    178 	}
    179 
    180 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    181 
    182 	ucode_version = le32_to_cpu(hdr->ucode_version);
    183 	version_major = (ucode_version >> 20) & 0xfff;
    184 	version_minor = (ucode_version >> 8) & 0xfff;
    185 	binary_id = ucode_version & 0xff;
    186 	DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n",
    187 		version_major, version_minor, binary_id);
    188 	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
    189 				(binary_id << 8));
    190 
    191 	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
    192 				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
    193 				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
    194 	if (r) {
    195 		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
    196 		return r;
    197 	}
    198 
    199 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    200 		atomic_set(&adev->vce.handles[i], 0);
    201 		adev->vce.filp[i] = NULL;
    202 	}
    203 
    204 	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
    205 	mutex_init(&adev->vce.idle_mutex);
    206 
    207 	return 0;
    208 }
    209 
    210 /**
    211  * amdgpu_vce_fini - free memory
    212  *
    213  * @adev: amdgpu_device pointer
    214  *
    215  * Last step on VCE teardown, free firmware memory
    216  */
    217 int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
    218 {
    219 	unsigned i;
    220 
    221 	if (adev->vce.vcpu_bo == NULL)
    222 		return 0;
    223 
    224 	cancel_delayed_work_sync(&adev->vce.idle_work);
    225 	drm_sched_entity_destroy(&adev->vce.entity);
    226 
    227 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
    228 		(void **)&adev->vce.cpu_addr);
    229 
    230 	for (i = 0; i < adev->vce.num_rings; i++)
    231 		amdgpu_ring_fini(&adev->vce.ring[i]);
    232 
    233 	release_firmware(adev->vce.fw);
    234 	mutex_destroy(&adev->vce.idle_mutex);
    235 
    236 	return 0;
    237 }
    238 
    239 /**
    240  * amdgpu_vce_entity_init - init entity
    241  *
    242  * @adev: amdgpu_device pointer
    243  *
    244  */
    245 int amdgpu_vce_entity_init(struct amdgpu_device *adev)
    246 {
    247 	struct amdgpu_ring *ring;
    248 	struct drm_gpu_scheduler *sched;
    249 	int r;
    250 
    251 	ring = &adev->vce.ring[0];
    252 	sched = &ring->sched;
    253 	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
    254 				  &sched, 1, NULL);
    255 	if (r != 0) {
    256 		DRM_ERROR("Failed setting up VCE run queue.\n");
    257 		return r;
    258 	}
    259 
    260 	return 0;
    261 }
    262 
    263 /**
    264  * amdgpu_vce_suspend - unpin VCE fw memory
    265  *
    266  * @adev: amdgpu_device pointer
    267  *
    268  */
    269 int amdgpu_vce_suspend(struct amdgpu_device *adev)
    270 {
    271 	int i;
    272 
    273 	cancel_delayed_work_sync(&adev->vce.idle_work);
    274 
    275 	if (adev->vce.vcpu_bo == NULL)
    276 		return 0;
    277 
    278 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
    279 		if (atomic_read(&adev->vce.handles[i]))
    280 			break;
    281 
    282 	if (i == AMDGPU_MAX_VCE_HANDLES)
    283 		return 0;
    284 
    285 	/* TODO: suspending running encoding sessions isn't supported */
    286 	return -EINVAL;
    287 }
    288 
    289 /**
    290  * amdgpu_vce_resume - pin VCE fw memory
    291  *
    292  * @adev: amdgpu_device pointer
    293  *
    294  */
    295 int amdgpu_vce_resume(struct amdgpu_device *adev)
    296 {
    297 	void *cpu_addr;
    298 	const struct common_firmware_header *hdr;
    299 	unsigned offset;
    300 	int r;
    301 
    302 	if (adev->vce.vcpu_bo == NULL)
    303 		return -EINVAL;
    304 
    305 	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
    306 	if (r) {
    307 		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
    308 		return r;
    309 	}
    310 
    311 	r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
    312 	if (r) {
    313 		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    314 		dev_err(adev->dev, "(%d) VCE map failed\n", r);
    315 		return r;
    316 	}
    317 
    318 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    319 	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
    320 	memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
    321 		    adev->vce.fw->size - offset);
    322 
    323 	amdgpu_bo_kunmap(adev->vce.vcpu_bo);
    324 
    325 	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    326 
    327 	return 0;
    328 }
    329 
    330 /**
    331  * amdgpu_vce_idle_work_handler - power off VCE
    332  *
    333  * @work: pointer to work structure
    334  *
    335  * power of VCE when it's not used any more
    336  */
    337 static void amdgpu_vce_idle_work_handler(struct work_struct *work)
    338 {
    339 	struct amdgpu_device *adev =
    340 		container_of(work, struct amdgpu_device, vce.idle_work.work);
    341 	unsigned i, count = 0;
    342 
    343 	for (i = 0; i < adev->vce.num_rings; i++)
    344 		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
    345 
    346 	if (count == 0) {
    347 		if (adev->pm.dpm_enabled) {
    348 			amdgpu_dpm_enable_vce(adev, false);
    349 		} else {
    350 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
    351 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    352 							       AMD_PG_STATE_GATE);
    353 			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    354 							       AMD_CG_STATE_GATE);
    355 		}
    356 	} else {
    357 		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
    358 	}
    359 }
    360 
    361 /**
    362  * amdgpu_vce_ring_begin_use - power up VCE
    363  *
    364  * @ring: amdgpu ring
    365  *
    366  * Make sure VCE is powerd up when we want to use it
    367  */
    368 void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
    369 {
    370 	struct amdgpu_device *adev = ring->adev;
    371 	bool set_clocks;
    372 
    373 	if (amdgpu_sriov_vf(adev))
    374 		return;
    375 
    376 	mutex_lock(&adev->vce.idle_mutex);
    377 	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
    378 	if (set_clocks) {
    379 		if (adev->pm.dpm_enabled) {
    380 			amdgpu_dpm_enable_vce(adev, true);
    381 		} else {
    382 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
    383 			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    384 							       AMD_CG_STATE_UNGATE);
    385 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    386 							       AMD_PG_STATE_UNGATE);
    387 
    388 		}
    389 	}
    390 	mutex_unlock(&adev->vce.idle_mutex);
    391 }
    392 
    393 /**
    394  * amdgpu_vce_ring_end_use - power VCE down
    395  *
    396  * @ring: amdgpu ring
    397  *
    398  * Schedule work to power VCE down again
    399  */
    400 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
    401 {
    402 	if (!amdgpu_sriov_vf(ring->adev))
    403 		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
    404 }
    405 
    406 /**
    407  * amdgpu_vce_free_handles - free still open VCE handles
    408  *
    409  * @adev: amdgpu_device pointer
    410  * @filp: drm file pointer
    411  *
    412  * Close all VCE handles still open by this file pointer
    413  */
    414 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
    415 {
    416 	struct amdgpu_ring *ring = &adev->vce.ring[0];
    417 	int i, r;
    418 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    419 		uint32_t handle = atomic_read(&adev->vce.handles[i]);
    420 
    421 		if (!handle || adev->vce.filp[i] != filp)
    422 			continue;
    423 
    424 		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
    425 		if (r)
    426 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
    427 
    428 		adev->vce.filp[i] = NULL;
    429 		atomic_set(&adev->vce.handles[i], 0);
    430 	}
    431 }
    432 
    433 /**
    434  * amdgpu_vce_get_create_msg - generate a VCE create msg
    435  *
    436  * @adev: amdgpu_device pointer
    437  * @ring: ring we should submit the msg to
    438  * @handle: VCE session handle to use
    439  * @fence: optional fence to return
    440  *
    441  * Open up a stream for HW test
    442  */
    443 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
    444 				     struct amdgpu_bo *bo,
    445 				     struct dma_fence **fence)
    446 {
    447 	const unsigned ib_size_dw = 1024;
    448 	struct amdgpu_job *job;
    449 	struct amdgpu_ib *ib;
    450 	struct dma_fence *f = NULL;
    451 	uint64_t addr;
    452 	int i, r;
    453 
    454 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
    455 	if (r)
    456 		return r;
    457 
    458 	ib = &job->ibs[0];
    459 
    460 	addr = amdgpu_bo_gpu_offset(bo);
    461 
    462 	/* stitch together an VCE create msg */
    463 	ib->length_dw = 0;
    464 	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
    465 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
    466 	ib->ptr[ib->length_dw++] = handle;
    467 
    468 	if ((ring->adev->vce.fw_version >> 24) >= 52)
    469 		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
    470 	else
    471 		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
    472 	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
    473 	ib->ptr[ib->length_dw++] = 0x00000000;
    474 	ib->ptr[ib->length_dw++] = 0x00000042;
    475 	ib->ptr[ib->length_dw++] = 0x0000000a;
    476 	ib->ptr[ib->length_dw++] = 0x00000001;
    477 	ib->ptr[ib->length_dw++] = 0x00000080;
    478 	ib->ptr[ib->length_dw++] = 0x00000060;
    479 	ib->ptr[ib->length_dw++] = 0x00000100;
    480 	ib->ptr[ib->length_dw++] = 0x00000100;
    481 	ib->ptr[ib->length_dw++] = 0x0000000c;
    482 	ib->ptr[ib->length_dw++] = 0x00000000;
    483 	if ((ring->adev->vce.fw_version >> 24) >= 52) {
    484 		ib->ptr[ib->length_dw++] = 0x00000000;
    485 		ib->ptr[ib->length_dw++] = 0x00000000;
    486 		ib->ptr[ib->length_dw++] = 0x00000000;
    487 		ib->ptr[ib->length_dw++] = 0x00000000;
    488 	}
    489 
    490 	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
    491 	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
    492 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
    493 	ib->ptr[ib->length_dw++] = addr;
    494 	ib->ptr[ib->length_dw++] = 0x00000001;
    495 
    496 	for (i = ib->length_dw; i < ib_size_dw; ++i)
    497 		ib->ptr[i] = 0x0;
    498 
    499 	r = amdgpu_job_submit_direct(job, ring, &f);
    500 	if (r)
    501 		goto err;
    502 
    503 	if (fence)
    504 		*fence = dma_fence_get(f);
    505 	dma_fence_put(f);
    506 	return 0;
    507 
    508 err:
    509 	amdgpu_job_free(job);
    510 	return r;
    511 }
    512 
    513 /**
    514  * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
    515  *
    516  * @adev: amdgpu_device pointer
    517  * @ring: ring we should submit the msg to
    518  * @handle: VCE session handle to use
    519  * @fence: optional fence to return
    520  *
    521  * Close up a stream for HW test or if userspace failed to do so
    522  */
    523 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
    524 				      bool direct, struct dma_fence **fence)
    525 {
    526 	const unsigned ib_size_dw = 1024;
    527 	struct amdgpu_job *job;
    528 	struct amdgpu_ib *ib;
    529 	struct dma_fence *f = NULL;
    530 	int i, r;
    531 
    532 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
    533 	if (r)
    534 		return r;
    535 
    536 	ib = &job->ibs[0];
    537 
    538 	/* stitch together an VCE destroy msg */
    539 	ib->length_dw = 0;
    540 	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
    541 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
    542 	ib->ptr[ib->length_dw++] = handle;
    543 
    544 	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
    545 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
    546 	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
    547 	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
    548 	ib->ptr[ib->length_dw++] = 0x00000000;
    549 	ib->ptr[ib->length_dw++] = 0x00000000;
    550 	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
    551 	ib->ptr[ib->length_dw++] = 0x00000000;
    552 
    553 	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
    554 	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
    555 
    556 	for (i = ib->length_dw; i < ib_size_dw; ++i)
    557 		ib->ptr[i] = 0x0;
    558 
    559 	if (direct)
    560 		r = amdgpu_job_submit_direct(job, ring, &f);
    561 	else
    562 		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
    563 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
    564 	if (r)
    565 		goto err;
    566 
    567 	if (fence)
    568 		*fence = dma_fence_get(f);
    569 	dma_fence_put(f);
    570 	return 0;
    571 
    572 err:
    573 	amdgpu_job_free(job);
    574 	return r;
    575 }
    576 
    577 /**
    578  * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary
    579  *
    580  * @p: parser context
    581  * @lo: address of lower dword
    582  * @hi: address of higher dword
    583  * @size: minimum size
    584  * @index: bs/fb index
    585  *
    586  * Make sure that no BO cross a 4GB boundary.
    587  */
    588 static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
    589 				  int lo, int hi, unsigned size, int32_t index)
    590 {
    591 	int64_t offset = ((uint64_t)size) * ((int64_t)index);
    592 	struct ttm_operation_ctx ctx = { false, false };
    593 	struct amdgpu_bo_va_mapping *mapping;
    594 	unsigned i, fpfn, lpfn;
    595 	struct amdgpu_bo *bo;
    596 	uint64_t addr;
    597 	int r;
    598 
    599 	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
    600 	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
    601 	if (index >= 0) {
    602 		addr += offset;
    603 		fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
    604 		lpfn = 0x100000000ULL >> PAGE_SHIFT;
    605 	} else {
    606 		fpfn = 0;
    607 		lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
    608 	}
    609 
    610 	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
    611 	if (r) {
    612 		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
    613 			  addr, lo, hi, size, index);
    614 		return r;
    615 	}
    616 
    617 	for (i = 0; i < bo->placement.num_placement; ++i) {
    618 		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
    619 		bo->placements[i].lpfn = bo->placements[i].lpfn ?
    620 			min(bo->placements[i].lpfn, lpfn) : lpfn;
    621 	}
    622 	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    623 }
    624 
    625 
    626 /**
    627  * amdgpu_vce_cs_reloc - command submission relocation
    628  *
    629  * @p: parser context
    630  * @lo: address of lower dword
    631  * @hi: address of higher dword
    632  * @size: minimum size
    633  *
    634  * Patch relocation inside command stream with real buffer address
    635  */
    636 static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
    637 			       int lo, int hi, unsigned size, uint32_t index)
    638 {
    639 	struct amdgpu_bo_va_mapping *mapping;
    640 	struct amdgpu_bo *bo;
    641 	uint64_t addr;
    642 	int r;
    643 
    644 	if (index == 0xffffffff)
    645 		index = 0;
    646 
    647 	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
    648 	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
    649 	addr += ((uint64_t)size) * ((uint64_t)index);
    650 
    651 	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
    652 	if (r) {
    653 		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
    654 			  addr, lo, hi, size, index);
    655 		return r;
    656 	}
    657 
    658 	if ((addr + (uint64_t)size) >
    659 	    (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
    660 		DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
    661 			  addr, lo, hi);
    662 		return -EINVAL;
    663 	}
    664 
    665 	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
    666 	addr += amdgpu_bo_gpu_offset(bo);
    667 	addr -= ((uint64_t)size) * ((uint64_t)index);
    668 
    669 	amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
    670 	amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
    671 
    672 	return 0;
    673 }
    674 
    675 /**
    676  * amdgpu_vce_validate_handle - validate stream handle
    677  *
    678  * @p: parser context
    679  * @handle: handle to validate
    680  * @allocated: allocated a new handle?
    681  *
    682  * Validates the handle and return the found session index or -EINVAL
    683  * we we don't have another free session index.
    684  */
    685 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
    686 				      uint32_t handle, uint32_t *allocated)
    687 {
    688 	unsigned i;
    689 
    690 	/* validate the handle */
    691 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    692 		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
    693 			if (p->adev->vce.filp[i] != p->filp) {
    694 				DRM_ERROR("VCE handle collision detected!\n");
    695 				return -EINVAL;
    696 			}
    697 			return i;
    698 		}
    699 	}
    700 
    701 	/* handle not found try to alloc a new one */
    702 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    703 		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
    704 			p->adev->vce.filp[i] = p->filp;
    705 			p->adev->vce.img_size[i] = 0;
    706 			*allocated |= 1 << i;
    707 			return i;
    708 		}
    709 	}
    710 
    711 	DRM_ERROR("No more free VCE handles!\n");
    712 	return -EINVAL;
    713 }
    714 
    715 /**
    716  * amdgpu_vce_cs_parse - parse and validate the command stream
    717  *
    718  * @p: parser context
    719  *
    720  */
    721 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
    722 {
    723 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
    724 	unsigned fb_idx = 0, bs_idx = 0;
    725 	int session_idx = -1;
    726 	uint32_t destroyed = 0;
    727 	uint32_t created = 0;
    728 	uint32_t allocated = 0;
    729 	uint32_t tmp, handle = 0;
    730 	uint32_t *size = &tmp;
    731 	unsigned idx;
    732 	int i, r = 0;
    733 
    734 	p->job->vm = NULL;
    735 	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
    736 
    737 	for (idx = 0; idx < ib->length_dw;) {
    738 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
    739 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
    740 
    741 		if ((len < 8) || (len & 3)) {
    742 			DRM_ERROR("invalid VCE command length (%d)!\n", len);
    743 			r = -EINVAL;
    744 			goto out;
    745 		}
    746 
    747 		switch (cmd) {
    748 		case 0x00000002: /* task info */
    749 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
    750 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
    751 			break;
    752 
    753 		case 0x03000001: /* encode */
    754 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
    755 						   idx + 9, 0, 0);
    756 			if (r)
    757 				goto out;
    758 
    759 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
    760 						   idx + 11, 0, 0);
    761 			if (r)
    762 				goto out;
    763 			break;
    764 
    765 		case 0x05000001: /* context buffer */
    766 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
    767 						   idx + 2, 0, 0);
    768 			if (r)
    769 				goto out;
    770 			break;
    771 
    772 		case 0x05000004: /* video bitstream buffer */
    773 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
    774 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
    775 						   tmp, bs_idx);
    776 			if (r)
    777 				goto out;
    778 			break;
    779 
    780 		case 0x05000005: /* feedback buffer */
    781 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
    782 						   4096, fb_idx);
    783 			if (r)
    784 				goto out;
    785 			break;
    786 
    787 		case 0x0500000d: /* MV buffer */
    788 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
    789 							idx + 2, 0, 0);
    790 			if (r)
    791 				goto out;
    792 
    793 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
    794 							idx + 7, 0, 0);
    795 			if (r)
    796 				goto out;
    797 			break;
    798 		}
    799 
    800 		idx += len / 4;
    801 	}
    802 
    803 	for (idx = 0; idx < ib->length_dw;) {
    804 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
    805 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
    806 
    807 		switch (cmd) {
    808 		case 0x00000001: /* session */
    809 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
    810 			session_idx = amdgpu_vce_validate_handle(p, handle,
    811 								 &allocated);
    812 			if (session_idx < 0) {
    813 				r = session_idx;
    814 				goto out;
    815 			}
    816 			size = &p->adev->vce.img_size[session_idx];
    817 			break;
    818 
    819 		case 0x00000002: /* task info */
    820 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
    821 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
    822 			break;
    823 
    824 		case 0x01000001: /* create */
    825 			created |= 1 << session_idx;
    826 			if (destroyed & (1 << session_idx)) {
    827 				destroyed &= ~(1 << session_idx);
    828 				allocated |= 1 << session_idx;
    829 
    830 			} else if (!(allocated & (1 << session_idx))) {
    831 				DRM_ERROR("Handle already in use!\n");
    832 				r = -EINVAL;
    833 				goto out;
    834 			}
    835 
    836 			*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
    837 				amdgpu_get_ib_value(p, ib_idx, idx + 10) *
    838 				8 * 3 / 2;
    839 			break;
    840 
    841 		case 0x04000001: /* config extension */
    842 		case 0x04000002: /* pic control */
    843 		case 0x04000005: /* rate control */
    844 		case 0x04000007: /* motion estimation */
    845 		case 0x04000008: /* rdo */
    846 		case 0x04000009: /* vui */
    847 		case 0x05000002: /* auxiliary buffer */
    848 		case 0x05000009: /* clock table */
    849 			break;
    850 
    851 		case 0x0500000c: /* hw config */
    852 			switch (p->adev->asic_type) {
    853 #ifdef CONFIG_DRM_AMDGPU_CIK
    854 			case CHIP_KAVERI:
    855 			case CHIP_MULLINS:
    856 #endif
    857 			case CHIP_CARRIZO:
    858 				break;
    859 			default:
    860 				r = -EINVAL;
    861 				goto out;
    862 			}
    863 			break;
    864 
    865 		case 0x03000001: /* encode */
    866 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
    867 						*size, 0);
    868 			if (r)
    869 				goto out;
    870 
    871 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
    872 						*size / 3, 0);
    873 			if (r)
    874 				goto out;
    875 			break;
    876 
    877 		case 0x02000001: /* destroy */
    878 			destroyed |= 1 << session_idx;
    879 			break;
    880 
    881 		case 0x05000001: /* context buffer */
    882 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
    883 						*size * 2, 0);
    884 			if (r)
    885 				goto out;
    886 			break;
    887 
    888 		case 0x05000004: /* video bitstream buffer */
    889 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
    890 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
    891 						tmp, bs_idx);
    892 			if (r)
    893 				goto out;
    894 			break;
    895 
    896 		case 0x05000005: /* feedback buffer */
    897 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
    898 						4096, fb_idx);
    899 			if (r)
    900 				goto out;
    901 			break;
    902 
    903 		case 0x0500000d: /* MV buffer */
    904 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
    905 							idx + 2, *size, 0);
    906 			if (r)
    907 				goto out;
    908 
    909 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
    910 							idx + 7, *size / 12, 0);
    911 			if (r)
    912 				goto out;
    913 			break;
    914 
    915 		default:
    916 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
    917 			r = -EINVAL;
    918 			goto out;
    919 		}
    920 
    921 		if (session_idx == -1) {
    922 			DRM_ERROR("no session command at start of IB\n");
    923 			r = -EINVAL;
    924 			goto out;
    925 		}
    926 
    927 		idx += len / 4;
    928 	}
    929 
    930 	if (allocated & ~created) {
    931 		DRM_ERROR("New session without create command!\n");
    932 		r = -ENOENT;
    933 	}
    934 
    935 out:
    936 	if (!r) {
    937 		/* No error, free all destroyed handle slots */
    938 		tmp = destroyed;
    939 	} else {
    940 		/* Error during parsing, free all allocated handle slots */
    941 		tmp = allocated;
    942 	}
    943 
    944 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
    945 		if (tmp & (1 << i))
    946 			atomic_set(&p->adev->vce.handles[i], 0);
    947 
    948 	return r;
    949 }
    950 
    951 /**
    952  * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
    953  *
    954  * @p: parser context
    955  *
    956  */
    957 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
    958 {
    959 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
    960 	int session_idx = -1;
    961 	uint32_t destroyed = 0;
    962 	uint32_t created = 0;
    963 	uint32_t allocated = 0;
    964 	uint32_t tmp, handle = 0;
    965 	int i, r = 0, idx = 0;
    966 
    967 	while (idx < ib->length_dw) {
    968 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
    969 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
    970 
    971 		if ((len < 8) || (len & 3)) {
    972 			DRM_ERROR("invalid VCE command length (%d)!\n", len);
    973 			r = -EINVAL;
    974 			goto out;
    975 		}
    976 
    977 		switch (cmd) {
    978 		case 0x00000001: /* session */
    979 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
    980 			session_idx = amdgpu_vce_validate_handle(p, handle,
    981 								 &allocated);
    982 			if (session_idx < 0) {
    983 				r = session_idx;
    984 				goto out;
    985 			}
    986 			break;
    987 
    988 		case 0x01000001: /* create */
    989 			created |= 1 << session_idx;
    990 			if (destroyed & (1 << session_idx)) {
    991 				destroyed &= ~(1 << session_idx);
    992 				allocated |= 1 << session_idx;
    993 
    994 			} else if (!(allocated & (1 << session_idx))) {
    995 				DRM_ERROR("Handle already in use!\n");
    996 				r = -EINVAL;
    997 				goto out;
    998 			}
    999 
   1000 			break;
   1001 
   1002 		case 0x02000001: /* destroy */
   1003 			destroyed |= 1 << session_idx;
   1004 			break;
   1005 
   1006 		default:
   1007 			break;
   1008 		}
   1009 
   1010 		if (session_idx == -1) {
   1011 			DRM_ERROR("no session command at start of IB\n");
   1012 			r = -EINVAL;
   1013 			goto out;
   1014 		}
   1015 
   1016 		idx += len / 4;
   1017 	}
   1018 
   1019 	if (allocated & ~created) {
   1020 		DRM_ERROR("New session without create command!\n");
   1021 		r = -ENOENT;
   1022 	}
   1023 
   1024 out:
   1025 	if (!r) {
   1026 		/* No error, free all destroyed handle slots */
   1027 		tmp = destroyed;
   1028 		amdgpu_ib_free(p->adev, ib, NULL);
   1029 	} else {
   1030 		/* Error during parsing, free all allocated handle slots */
   1031 		tmp = allocated;
   1032 	}
   1033 
   1034 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
   1035 		if (tmp & (1 << i))
   1036 			atomic_set(&p->adev->vce.handles[i], 0);
   1037 
   1038 	return r;
   1039 }
   1040 
   1041 /**
   1042  * amdgpu_vce_ring_emit_ib - execute indirect buffer
   1043  *
   1044  * @ring: engine to use
   1045  * @ib: the IB to execute
   1046  *
   1047  */
   1048 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
   1049 				struct amdgpu_job *job,
   1050 				struct amdgpu_ib *ib,
   1051 				uint32_t flags)
   1052 {
   1053 	amdgpu_ring_write(ring, VCE_CMD_IB);
   1054 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
   1055 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
   1056 	amdgpu_ring_write(ring, ib->length_dw);
   1057 }
   1058 
   1059 /**
   1060  * amdgpu_vce_ring_emit_fence - add a fence command to the ring
   1061  *
   1062  * @ring: engine to use
   1063  * @fence: the fence
   1064  *
   1065  */
   1066 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
   1067 				unsigned flags)
   1068 {
   1069 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
   1070 
   1071 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
   1072 	amdgpu_ring_write(ring, addr);
   1073 	amdgpu_ring_write(ring, upper_32_bits(addr));
   1074 	amdgpu_ring_write(ring, seq);
   1075 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
   1076 	amdgpu_ring_write(ring, VCE_CMD_END);
   1077 }
   1078 
   1079 /**
   1080  * amdgpu_vce_ring_test_ring - test if VCE ring is working
   1081  *
   1082  * @ring: the engine to test on
   1083  *
   1084  */
   1085 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
   1086 {
   1087 	struct amdgpu_device *adev = ring->adev;
   1088 	uint32_t rptr;
   1089 	unsigned i;
   1090 	int r, timeout = adev->usec_timeout;
   1091 
   1092 	/* skip ring test for sriov*/
   1093 	if (amdgpu_sriov_vf(adev))
   1094 		return 0;
   1095 
   1096 	r = amdgpu_ring_alloc(ring, 16);
   1097 	if (r)
   1098 		return r;
   1099 
   1100 	rptr = amdgpu_ring_get_rptr(ring);
   1101 
   1102 	amdgpu_ring_write(ring, VCE_CMD_END);
   1103 	amdgpu_ring_commit(ring);
   1104 
   1105 	for (i = 0; i < timeout; i++) {
   1106 		if (amdgpu_ring_get_rptr(ring) != rptr)
   1107 			break;
   1108 		udelay(1);
   1109 	}
   1110 
   1111 	if (i >= timeout)
   1112 		r = -ETIMEDOUT;
   1113 
   1114 	return r;
   1115 }
   1116 
   1117 /**
   1118  * amdgpu_vce_ring_test_ib - test if VCE IBs are working
   1119  *
   1120  * @ring: the engine to test on
   1121  *
   1122  */
   1123 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
   1124 {
   1125 	struct dma_fence *fence = NULL;
   1126 	struct amdgpu_bo *bo = NULL;
   1127 	long r;
   1128 
   1129 	/* skip vce ring1/2 ib test for now, since it's not reliable */
   1130 	if (ring != &ring->adev->vce.ring[0])
   1131 		return 0;
   1132 
   1133 	r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
   1134 				      AMDGPU_GEM_DOMAIN_VRAM,
   1135 				      &bo, NULL, NULL);
   1136 	if (r)
   1137 		return r;
   1138 
   1139 	r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
   1140 	if (r)
   1141 		goto error;
   1142 
   1143 	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
   1144 	if (r)
   1145 		goto error;
   1146 
   1147 	r = dma_fence_wait_timeout(fence, false, timeout);
   1148 	if (r == 0)
   1149 		r = -ETIMEDOUT;
   1150 	else if (r > 0)
   1151 		r = 0;
   1152 
   1153 error:
   1154 	dma_fence_put(fence);
   1155 	amdgpu_bo_unreserve(bo);
   1156 	amdgpu_bo_unref(&bo);
   1157 	return r;
   1158 }
   1159