Home | History | Annotate | Line # | Download | only in amdgpu
      1 /*	$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2016 Advanced Micro Devices, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $");
     31 
     32 #include <linux/firmware.h>
     33 
     34 #include "amdgpu.h"
     35 #include "amdgpu_vce.h"
     36 #include "soc15.h"
     37 #include "soc15d.h"
     38 #include "soc15_common.h"
     39 #include "mmsch_v1_0.h"
     40 
     41 #include "vce/vce_4_0_offset.h"
     42 #include "vce/vce_4_0_default.h"
     43 #include "vce/vce_4_0_sh_mask.h"
     44 #include "mmhub/mmhub_1_0_offset.h"
     45 #include "mmhub/mmhub_1_0_sh_mask.h"
     46 
     47 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
     48 
     49 #include <linux/nbsd-namespace.h>
     50 
     51 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
     52 
     53 #define VCE_V4_0_FW_SIZE	(384 * 1024)
     54 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
     55 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
     56 
     57 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
     58 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
     59 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
     60 
     61 /**
     62  * vce_v4_0_ring_get_rptr - get read pointer
     63  *
     64  * @ring: amdgpu_ring pointer
     65  *
     66  * Returns the current hardware read pointer
     67  */
     68 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
     69 {
     70 	struct amdgpu_device *adev = ring->adev;
     71 
     72 	if (ring->me == 0)
     73 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
     74 	else if (ring->me == 1)
     75 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
     76 	else
     77 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
     78 }
     79 
     80 /**
     81  * vce_v4_0_ring_get_wptr - get write pointer
     82  *
     83  * @ring: amdgpu_ring pointer
     84  *
     85  * Returns the current hardware write pointer
     86  */
     87 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
     88 {
     89 	struct amdgpu_device *adev = ring->adev;
     90 
     91 	if (ring->use_doorbell)
     92 		return adev->wb.wb[ring->wptr_offs];
     93 
     94 	if (ring->me == 0)
     95 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
     96 	else if (ring->me == 1)
     97 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
     98 	else
     99 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
    100 }
    101 
    102 /**
    103  * vce_v4_0_ring_set_wptr - set write pointer
    104  *
    105  * @ring: amdgpu_ring pointer
    106  *
    107  * Commits the write pointer to the hardware
    108  */
    109 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
    110 {
    111 	struct amdgpu_device *adev = ring->adev;
    112 
    113 	if (ring->use_doorbell) {
    114 		/* XXX check if swapping is necessary on BE */
    115 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
    116 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
    117 		return;
    118 	}
    119 
    120 	if (ring->me == 0)
    121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
    122 			lower_32_bits(ring->wptr));
    123 	else if (ring->me == 1)
    124 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
    125 			lower_32_bits(ring->wptr));
    126 	else
    127 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
    128 			lower_32_bits(ring->wptr));
    129 }
    130 
    131 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
    132 {
    133 	int i, j;
    134 
    135 	for (i = 0; i < 10; ++i) {
    136 		for (j = 0; j < 100; ++j) {
    137 			uint32_t status =
    138 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
    139 
    140 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
    141 				return 0;
    142 			mdelay(10);
    143 		}
    144 
    145 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
    146 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
    147 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
    148 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
    149 		mdelay(10);
    150 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
    151 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
    152 		mdelay(10);
    153 
    154 	}
    155 
    156 	return -ETIMEDOUT;
    157 }
    158 
    159 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
    160 				struct amdgpu_mm_table *table)
    161 {
    162 	uint32_t data = 0, loop;
    163 	uint64_t addr = table->gpu_addr;
    164 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
    165 	uint32_t size;
    166 
    167 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
    168 
    169 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
    170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
    171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
    172 
    173 	/* 2, update vmid of descriptor */
    174 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
    175 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
    176 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
    177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
    178 
    179 	/* 3, notify mmsch about the size of this descriptor */
    180 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
    181 
    182 	/* 4, set resp to zero */
    183 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
    184 
    185 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
    186 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
    187 	adev->vce.ring[0].wptr = 0;
    188 	adev->vce.ring[0].wptr_old = 0;
    189 
    190 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
    191 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
    192 
    193 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
    194 	loop = 1000;
    195 	while ((data & 0x10000002) != 0x10000002) {
    196 		udelay(10);
    197 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
    198 		loop--;
    199 		if (!loop)
    200 			break;
    201 	}
    202 
    203 	if (!loop) {
    204 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
    205 		return -EBUSY;
    206 	}
    207 
    208 	return 0;
    209 }
    210 
    211 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
    212 {
    213 	struct amdgpu_ring *ring;
    214 	uint32_t offset, size;
    215 	uint32_t table_size = 0;
    216 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
    217 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
    218 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
    219 	struct mmsch_v1_0_cmd_end end = { { 0 } };
    220 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
    221 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
    222 
    223 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
    224 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
    225 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
    226 	end.cmd_header.command_type = MMSCH_COMMAND__END;
    227 
    228 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
    229 		header->version = MMSCH_VERSION;
    230 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
    231 
    232 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
    233 			header->vce_table_offset = header->header_size;
    234 		else
    235 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
    236 
    237 		init_table += header->vce_table_offset;
    238 
    239 		ring = &adev->vce.ring[0];
    240 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
    241 					    lower_32_bits(ring->gpu_addr));
    242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
    243 					    upper_32_bits(ring->gpu_addr));
    244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
    245 					    ring->ring_size / 4);
    246 
    247 		/* BEGING OF MC_RESUME */
    248 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
    249 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
    250 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
    251 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
    252 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
    253 
    254 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
    255 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    256 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
    257 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
    258 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
    259 
    260 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    261 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
    262 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    263 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
    264 						(tmr_mc_addr >> 40) & 0xff);
    265 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
    266 		} else {
    267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    268 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
    269 						adev->vce.gpu_addr >> 8);
    270 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    271 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
    272 						(adev->vce.gpu_addr >> 40) & 0xff);
    273 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
    274 						offset & ~0x0f000000);
    275 
    276 		}
    277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
    279 						adev->vce.gpu_addr >> 8);
    280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
    282 						(adev->vce.gpu_addr >> 40) & 0xff);
    283 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    284 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
    285 						adev->vce.gpu_addr >> 8);
    286 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
    287 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
    288 						(adev->vce.gpu_addr >> 40) & 0xff);
    289 
    290 		size = VCE_V4_0_FW_SIZE;
    291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
    292 
    293 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
    294 		size = VCE_V4_0_STACK_SIZE;
    295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
    296 					(offset & ~0x0f000000) | (1 << 24));
    297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
    298 
    299 		offset += size;
    300 		size = VCE_V4_0_DATA_SIZE;
    301 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
    302 					(offset & ~0x0f000000) | (2 << 24));
    303 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
    304 
    305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
    306 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
    307 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
    308 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
    309 
    310 		/* end of MC_RESUME */
    311 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
    312 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
    313 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
    314 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
    315 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
    316 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
    317 
    318 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
    319 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
    320 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
    321 
    322 		/* clear BUSY flag */
    323 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
    324 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
    325 
    326 		/* add end packet */
    327 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
    328 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
    329 		header->vce_table_size = table_size;
    330 	}
    331 
    332 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
    333 }
    334 
    335 /**
    336  * vce_v4_0_start - start VCE block
    337  *
    338  * @adev: amdgpu_device pointer
    339  *
    340  * Setup and start the VCE block
    341  */
    342 static int vce_v4_0_start(struct amdgpu_device *adev)
    343 {
    344 	struct amdgpu_ring *ring;
    345 	int r;
    346 
    347 	ring = &adev->vce.ring[0];
    348 
    349 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
    350 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
    351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
    352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
    353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
    354 
    355 	ring = &adev->vce.ring[1];
    356 
    357 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
    358 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
    359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
    360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
    361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
    362 
    363 	ring = &adev->vce.ring[2];
    364 
    365 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
    366 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
    367 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
    368 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
    369 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
    370 
    371 	vce_v4_0_mc_resume(adev);
    372 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
    373 			~VCE_STATUS__JOB_BUSY_MASK);
    374 
    375 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
    376 
    377 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
    378 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
    379 	mdelay(100);
    380 
    381 	r = vce_v4_0_firmware_loaded(adev);
    382 
    383 	/* clear BUSY flag */
    384 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
    385 
    386 	if (r) {
    387 		DRM_ERROR("VCE not responding, giving up!!!\n");
    388 		return r;
    389 	}
    390 
    391 	return 0;
    392 }
    393 
    394 static int vce_v4_0_stop(struct amdgpu_device *adev)
    395 {
    396 
    397 	/* Disable VCPU */
    398 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
    399 
    400 	/* hold on ECPU */
    401 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
    402 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
    403 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
    404 
    405 	/* clear VCE_STATUS */
    406 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
    407 
    408 	/* Set Clock-Gating off */
    409 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
    410 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
    411 	*/
    412 
    413 	return 0;
    414 }
    415 
    416 static int vce_v4_0_early_init(void *handle)
    417 {
    418 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    419 
    420 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
    421 		adev->vce.num_rings = 1;
    422 	else
    423 		adev->vce.num_rings = 3;
    424 
    425 	vce_v4_0_set_ring_funcs(adev);
    426 	vce_v4_0_set_irq_funcs(adev);
    427 
    428 	return 0;
    429 }
    430 
    431 static int vce_v4_0_sw_init(void *handle)
    432 {
    433 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    434 	struct amdgpu_ring *ring;
    435 
    436 	unsigned size;
    437 	int r, i;
    438 
    439 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
    440 	if (r)
    441 		return r;
    442 
    443 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
    444 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
    445 		size += VCE_V4_0_FW_SIZE;
    446 
    447 	r = amdgpu_vce_sw_init(adev, size);
    448 	if (r)
    449 		return r;
    450 
    451 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    452 		const struct common_firmware_header *hdr;
    453 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
    454 
    455 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
    456 		if (!adev->vce.saved_bo)
    457 			return -ENOMEM;
    458 
    459 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    460 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
    461 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
    462 		adev->firmware.fw_size +=
    463 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
    464 		DRM_INFO("PSP loading VCE firmware\n");
    465 	} else {
    466 		r = amdgpu_vce_resume(adev);
    467 		if (r)
    468 			return r;
    469 	}
    470 
    471 	for (i = 0; i < adev->vce.num_rings; i++) {
    472 		ring = &adev->vce.ring[i];
    473 		snprintf(ring->name, sizeof(ring->name), "vce%d", i);
    474 		if (amdgpu_sriov_vf(adev)) {
    475 			/* DOORBELL only works under SRIOV */
    476 			ring->use_doorbell = true;
    477 
    478 			/* currently only use the first encoding ring for sriov,
    479 			 * so set unused location for other unused rings.
    480 			 */
    481 			if (i == 0)
    482 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
    483 			else
    484 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
    485 		}
    486 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
    487 		if (r)
    488 			return r;
    489 	}
    490 
    491 
    492 	r = amdgpu_vce_entity_init(adev);
    493 	if (r)
    494 		return r;
    495 
    496 	r = amdgpu_virt_alloc_mm_table(adev);
    497 	if (r)
    498 		return r;
    499 
    500 	return r;
    501 }
    502 
    503 static int vce_v4_0_sw_fini(void *handle)
    504 {
    505 	int r;
    506 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    507 
    508 	/* free MM table */
    509 	amdgpu_virt_free_mm_table(adev);
    510 
    511 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    512 		kvfree(adev->vce.saved_bo);
    513 		adev->vce.saved_bo = NULL;
    514 	}
    515 
    516 	r = amdgpu_vce_suspend(adev);
    517 	if (r)
    518 		return r;
    519 
    520 	return amdgpu_vce_sw_fini(adev);
    521 }
    522 
    523 static int vce_v4_0_hw_init(void *handle)
    524 {
    525 	int r, i;
    526 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    527 
    528 	if (amdgpu_sriov_vf(adev))
    529 		r = vce_v4_0_sriov_start(adev);
    530 	else
    531 		r = vce_v4_0_start(adev);
    532 	if (r)
    533 		return r;
    534 
    535 	for (i = 0; i < adev->vce.num_rings; i++) {
    536 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
    537 		if (r)
    538 			return r;
    539 	}
    540 
    541 	DRM_INFO("VCE initialized successfully.\n");
    542 
    543 	return 0;
    544 }
    545 
    546 static int vce_v4_0_hw_fini(void *handle)
    547 {
    548 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    549 	int i;
    550 
    551 	if (!amdgpu_sriov_vf(adev)) {
    552 		/* vce_v4_0_wait_for_idle(handle); */
    553 		vce_v4_0_stop(adev);
    554 	} else {
    555 		/* full access mode, so don't touch any VCE register */
    556 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
    557 	}
    558 
    559 	for (i = 0; i < adev->vce.num_rings; i++)
    560 		adev->vce.ring[i].sched.ready = false;
    561 
    562 	return 0;
    563 }
    564 
    565 static int vce_v4_0_suspend(void *handle)
    566 {
    567 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    568 	int r;
    569 
    570 	if (adev->vce.vcpu_bo == NULL)
    571 		return 0;
    572 
    573 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    574 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
    575 		void *ptr = adev->vce.cpu_addr;
    576 
    577 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
    578 	}
    579 
    580 	r = vce_v4_0_hw_fini(adev);
    581 	if (r)
    582 		return r;
    583 
    584 	return amdgpu_vce_suspend(adev);
    585 }
    586 
    587 static int vce_v4_0_resume(void *handle)
    588 {
    589 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    590 	int r;
    591 
    592 	if (adev->vce.vcpu_bo == NULL)
    593 		return -EINVAL;
    594 
    595 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    596 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
    597 		void *ptr = adev->vce.cpu_addr;
    598 
    599 		memcpy_toio(ptr, adev->vce.saved_bo, size);
    600 	} else {
    601 		r = amdgpu_vce_resume(adev);
    602 		if (r)
    603 			return r;
    604 	}
    605 
    606 	return vce_v4_0_hw_init(adev);
    607 }
    608 
    609 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
    610 {
    611 	uint32_t offset, size;
    612 	uint64_t tmr_mc_addr;
    613 
    614 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
    615 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
    616 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
    617 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
    618 
    619 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
    620 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
    621 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
    622 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
    623 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
    624 
    625 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
    626 
    627 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    628 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
    629 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
    630 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
    631 			(tmr_mc_addr >> 8));
    632 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
    633 			(tmr_mc_addr >> 40) & 0xff);
    634 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
    635 	} else {
    636 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
    637 			(adev->vce.gpu_addr >> 8));
    638 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
    639 			(adev->vce.gpu_addr >> 40) & 0xff);
    640 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
    641 	}
    642 
    643 	size = VCE_V4_0_FW_SIZE;
    644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
    645 
    646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
    647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
    648 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
    649 	size = VCE_V4_0_STACK_SIZE;
    650 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
    651 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
    652 
    653 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
    654 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
    655 	offset += size;
    656 	size = VCE_V4_0_DATA_SIZE;
    657 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
    658 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
    659 
    660 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
    661 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
    662 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
    663 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
    664 }
    665 
    666 static int vce_v4_0_set_clockgating_state(void *handle,
    667 					  enum amd_clockgating_state state)
    668 {
    669 	/* needed for driver unload*/
    670 	return 0;
    671 }
    672 
    673 #if 0
    674 static bool vce_v4_0_is_idle(void *handle)
    675 {
    676 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    677 	u32 mask = 0;
    678 
    679 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
    680 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
    681 
    682 	return !(RREG32(mmSRBM_STATUS2) & mask);
    683 }
    684 
    685 static int vce_v4_0_wait_for_idle(void *handle)
    686 {
    687 	unsigned i;
    688 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    689 
    690 	for (i = 0; i < adev->usec_timeout; i++)
    691 		if (vce_v4_0_is_idle(handle))
    692 			return 0;
    693 
    694 	return -ETIMEDOUT;
    695 }
    696 
    697 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
    698 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
    699 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
    700 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
    701 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
    702 
    703 static bool vce_v4_0_check_soft_reset(void *handle)
    704 {
    705 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    706 	u32 srbm_soft_reset = 0;
    707 
    708 	/* According to VCE team , we should use VCE_STATUS instead
    709 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
    710 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
    711 	 * instance's registers are accessed
    712 	 * (0 for 1st instance, 10 for 2nd instance).
    713 	 *
    714 	 *VCE_STATUS
    715 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
    716 	 *|----+----+-----------+----+----+----+----------+---------+----|
    717 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
    718 	 *
    719 	 * VCE team suggest use bit 3--bit 6 for busy status check
    720 	 */
    721 	mutex_lock(&adev->grbm_idx_mutex);
    722 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
    723 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
    724 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
    725 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
    726 	}
    727 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
    728 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
    729 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
    730 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
    731 	}
    732 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
    733 	mutex_unlock(&adev->grbm_idx_mutex);
    734 
    735 	if (srbm_soft_reset) {
    736 		adev->vce.srbm_soft_reset = srbm_soft_reset;
    737 		return true;
    738 	} else {
    739 		adev->vce.srbm_soft_reset = 0;
    740 		return false;
    741 	}
    742 }
    743 
    744 static int vce_v4_0_soft_reset(void *handle)
    745 {
    746 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    747 	u32 srbm_soft_reset;
    748 
    749 	if (!adev->vce.srbm_soft_reset)
    750 		return 0;
    751 	srbm_soft_reset = adev->vce.srbm_soft_reset;
    752 
    753 	if (srbm_soft_reset) {
    754 		u32 tmp;
    755 
    756 		tmp = RREG32(mmSRBM_SOFT_RESET);
    757 		tmp |= srbm_soft_reset;
    758 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
    759 		WREG32(mmSRBM_SOFT_RESET, tmp);
    760 		tmp = RREG32(mmSRBM_SOFT_RESET);
    761 
    762 		udelay(50);
    763 
    764 		tmp &= ~srbm_soft_reset;
    765 		WREG32(mmSRBM_SOFT_RESET, tmp);
    766 		tmp = RREG32(mmSRBM_SOFT_RESET);
    767 
    768 		/* Wait a little for things to settle down */
    769 		udelay(50);
    770 	}
    771 
    772 	return 0;
    773 }
    774 
    775 static int vce_v4_0_pre_soft_reset(void *handle)
    776 {
    777 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    778 
    779 	if (!adev->vce.srbm_soft_reset)
    780 		return 0;
    781 
    782 	mdelay(5);
    783 
    784 	return vce_v4_0_suspend(adev);
    785 }
    786 
    787 
    788 static int vce_v4_0_post_soft_reset(void *handle)
    789 {
    790 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    791 
    792 	if (!adev->vce.srbm_soft_reset)
    793 		return 0;
    794 
    795 	mdelay(5);
    796 
    797 	return vce_v4_0_resume(adev);
    798 }
    799 
    800 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
    801 {
    802 	u32 tmp, data;
    803 
    804 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
    805 	if (override)
    806 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
    807 	else
    808 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
    809 
    810 	if (tmp != data)
    811 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
    812 }
    813 
    814 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
    815 					     bool gated)
    816 {
    817 	u32 data;
    818 
    819 	/* Set Override to disable Clock Gating */
    820 	vce_v4_0_override_vce_clock_gating(adev, true);
    821 
    822 	/* This function enables MGCG which is controlled by firmware.
    823 	   With the clocks in the gated state the core is still
    824 	   accessible but the firmware will throttle the clocks on the
    825 	   fly as necessary.
    826 	*/
    827 	if (gated) {
    828 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
    829 		data |= 0x1ff;
    830 		data &= ~0xef0000;
    831 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
    832 
    833 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
    834 		data |= 0x3ff000;
    835 		data &= ~0xffc00000;
    836 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
    837 
    838 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
    839 		data |= 0x2;
    840 		data &= ~0x00010000;
    841 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
    842 
    843 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
    844 		data |= 0x37f;
    845 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
    846 
    847 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
    848 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
    849 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
    850 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
    851 			0x8;
    852 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
    853 	} else {
    854 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
    855 		data &= ~0x80010;
    856 		data |= 0xe70008;
    857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
    858 
    859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
    860 		data |= 0xffc00000;
    861 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
    862 
    863 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
    864 		data |= 0x10000;
    865 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
    866 
    867 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
    868 		data &= ~0xffc00000;
    869 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
    870 
    871 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
    872 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
    873 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
    874 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
    875 			  0x8);
    876 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
    877 	}
    878 	vce_v4_0_override_vce_clock_gating(adev, false);
    879 }
    880 
    881 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
    882 {
    883 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
    884 
    885 	if (enable)
    886 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
    887 	else
    888 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
    889 
    890 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
    891 }
    892 
    893 static int vce_v4_0_set_clockgating_state(void *handle,
    894 					  enum amd_clockgating_state state)
    895 {
    896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    897 	bool enable = (state == AMD_CG_STATE_GATE);
    898 	int i;
    899 
    900 	if ((adev->asic_type == CHIP_POLARIS10) ||
    901 		(adev->asic_type == CHIP_TONGA) ||
    902 		(adev->asic_type == CHIP_FIJI))
    903 		vce_v4_0_set_bypass_mode(adev, enable);
    904 
    905 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
    906 		return 0;
    907 
    908 	mutex_lock(&adev->grbm_idx_mutex);
    909 	for (i = 0; i < 2; i++) {
    910 		/* Program VCE Instance 0 or 1 if not harvested */
    911 		if (adev->vce.harvest_config & (1 << i))
    912 			continue;
    913 
    914 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
    915 
    916 		if (enable) {
    917 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
    918 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
    919 			data &= ~(0xf | 0xff0);
    920 			data |= ((0x0 << 0) | (0x04 << 4));
    921 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
    922 
    923 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
    924 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
    925 			data &= ~(0xf | 0xff0);
    926 			data |= ((0x0 << 0) | (0x04 << 4));
    927 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
    928 		}
    929 
    930 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
    931 	}
    932 
    933 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
    934 	mutex_unlock(&adev->grbm_idx_mutex);
    935 
    936 	return 0;
    937 }
    938 #endif
    939 
    940 static int vce_v4_0_set_powergating_state(void *handle,
    941 					  enum amd_powergating_state state)
    942 {
    943 	/* This doesn't actually powergate the VCE block.
    944 	 * That's done in the dpm code via the SMC.  This
    945 	 * just re-inits the block as necessary.  The actual
    946 	 * gating still happens in the dpm code.  We should
    947 	 * revisit this when there is a cleaner line between
    948 	 * the smc and the hw blocks
    949 	 */
    950 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    951 
    952 	if (state == AMD_PG_STATE_GATE)
    953 		return vce_v4_0_stop(adev);
    954 	else
    955 		return vce_v4_0_start(adev);
    956 }
    957 
    958 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
    959 					struct amdgpu_ib *ib, uint32_t flags)
    960 {
    961 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
    962 
    963 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
    964 	amdgpu_ring_write(ring, vmid);
    965 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
    966 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
    967 	amdgpu_ring_write(ring, ib->length_dw);
    968 }
    969 
    970 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
    971 			u64 seq, unsigned flags)
    972 {
    973 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
    974 
    975 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
    976 	amdgpu_ring_write(ring, addr);
    977 	amdgpu_ring_write(ring, upper_32_bits(addr));
    978 	amdgpu_ring_write(ring, seq);
    979 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
    980 }
    981 
    982 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
    983 {
    984 	amdgpu_ring_write(ring, VCE_CMD_END);
    985 }
    986 
    987 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
    988 				   uint32_t val, uint32_t mask)
    989 {
    990 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
    991 	amdgpu_ring_write(ring,	reg << 2);
    992 	amdgpu_ring_write(ring, mask);
    993 	amdgpu_ring_write(ring, val);
    994 }
    995 
    996 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
    997 				   unsigned int vmid, uint64_t pd_addr)
    998 {
    999 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
   1000 
   1001 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
   1002 
   1003 	/* wait for reg writes */
   1004 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
   1005 			       lower_32_bits(pd_addr), 0xffffffff);
   1006 }
   1007 
   1008 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
   1009 			       uint32_t reg, uint32_t val)
   1010 {
   1011 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
   1012 	amdgpu_ring_write(ring,	reg << 2);
   1013 	amdgpu_ring_write(ring, val);
   1014 }
   1015 
   1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
   1017 					struct amdgpu_irq_src *source,
   1018 					unsigned type,
   1019 					enum amdgpu_interrupt_state state)
   1020 {
   1021 	uint32_t val = 0;
   1022 
   1023 	if (!amdgpu_sriov_vf(adev)) {
   1024 		if (state == AMDGPU_IRQ_STATE_ENABLE)
   1025 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
   1026 
   1027 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
   1028 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
   1029 	}
   1030 	return 0;
   1031 }
   1032 
   1033 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
   1034 				      struct amdgpu_irq_src *source,
   1035 				      struct amdgpu_iv_entry *entry)
   1036 {
   1037 	DRM_DEBUG("IH: VCE\n");
   1038 
   1039 	switch (entry->src_data[0]) {
   1040 	case 0:
   1041 	case 1:
   1042 	case 2:
   1043 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
   1044 		break;
   1045 	default:
   1046 		DRM_ERROR("Unhandled interrupt: %d %d\n",
   1047 			  entry->src_id, entry->src_data[0]);
   1048 		break;
   1049 	}
   1050 
   1051 	return 0;
   1052 }
   1053 
   1054 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
   1055 	.name = "vce_v4_0",
   1056 	.early_init = vce_v4_0_early_init,
   1057 	.late_init = NULL,
   1058 	.sw_init = vce_v4_0_sw_init,
   1059 	.sw_fini = vce_v4_0_sw_fini,
   1060 	.hw_init = vce_v4_0_hw_init,
   1061 	.hw_fini = vce_v4_0_hw_fini,
   1062 	.suspend = vce_v4_0_suspend,
   1063 	.resume = vce_v4_0_resume,
   1064 	.is_idle = NULL /* vce_v4_0_is_idle */,
   1065 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
   1066 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
   1067 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
   1068 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
   1069 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
   1070 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
   1071 	.set_powergating_state = vce_v4_0_set_powergating_state,
   1072 };
   1073 
   1074 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
   1075 	.type = AMDGPU_RING_TYPE_VCE,
   1076 	.align_mask = 0x3f,
   1077 	.nop = VCE_CMD_NO_OP,
   1078 	.support_64bit_ptrs = false,
   1079 	.no_user_fence = true,
   1080 	.vmhub = AMDGPU_MMHUB_0,
   1081 	.get_rptr = vce_v4_0_ring_get_rptr,
   1082 	.get_wptr = vce_v4_0_ring_get_wptr,
   1083 	.set_wptr = vce_v4_0_ring_set_wptr,
   1084 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
   1085 	.emit_frame_size =
   1086 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
   1087 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
   1088 		4 + /* vce_v4_0_emit_vm_flush */
   1089 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
   1090 		1, /* vce_v4_0_ring_insert_end */
   1091 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
   1092 	.emit_ib = vce_v4_0_ring_emit_ib,
   1093 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
   1094 	.emit_fence = vce_v4_0_ring_emit_fence,
   1095 	.test_ring = amdgpu_vce_ring_test_ring,
   1096 	.test_ib = amdgpu_vce_ring_test_ib,
   1097 	.insert_nop = amdgpu_ring_insert_nop,
   1098 	.insert_end = vce_v4_0_ring_insert_end,
   1099 	.pad_ib = amdgpu_ring_generic_pad_ib,
   1100 	.begin_use = amdgpu_vce_ring_begin_use,
   1101 	.end_use = amdgpu_vce_ring_end_use,
   1102 	.emit_wreg = vce_v4_0_emit_wreg,
   1103 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
   1104 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
   1105 };
   1106 
   1107 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
   1108 {
   1109 	int i;
   1110 
   1111 	for (i = 0; i < adev->vce.num_rings; i++) {
   1112 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
   1113 		adev->vce.ring[i].me = i;
   1114 	}
   1115 	DRM_INFO("VCE enabled in VM mode\n");
   1116 }
   1117 
   1118 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
   1119 	.set = vce_v4_0_set_interrupt_state,
   1120 	.process = vce_v4_0_process_interrupt,
   1121 };
   1122 
   1123 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
   1124 {
   1125 	adev->vce.irq.num_types = 1;
   1126 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
   1127 };
   1128 
   1129 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
   1130 {
   1131 	.type = AMD_IP_BLOCK_TYPE_VCE,
   1132 	.major = 4,
   1133 	.minor = 0,
   1134 	.rev = 0,
   1135 	.funcs = &vce_v4_0_ip_funcs,
   1136 };
   1137