1 /* $NetBSD: kfd_device_queue_manager_vi.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $ */ 2 3 /* 4 * Copyright 2014 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <sys/cdefs.h> 27 __KERNEL_RCSID(0, "$NetBSD: kfd_device_queue_manager_vi.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $"); 28 29 #include "kfd_device_queue_manager.h" 30 #include "gca/gfx_8_0_enum.h" 31 #include "gca/gfx_8_0_sh_mask.h" 32 #include "oss/oss_3_0_sh_mask.h" 33 34 static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 35 struct qcm_process_device *qpd, 36 enum cache_policy default_policy, 37 enum cache_policy alternate_policy, 38 void __user *alternate_aperture_base, 39 uint64_t alternate_aperture_size); 40 static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, 41 struct qcm_process_device *qpd, 42 enum cache_policy default_policy, 43 enum cache_policy alternate_policy, 44 void __user *alternate_aperture_base, 45 uint64_t alternate_aperture_size); 46 static int update_qpd_vi(struct device_queue_manager *dqm, 47 struct qcm_process_device *qpd); 48 static int update_qpd_vi_tonga(struct device_queue_manager *dqm, 49 struct qcm_process_device *qpd); 50 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 51 struct qcm_process_device *qpd); 52 static void init_sdma_vm_tonga(struct device_queue_manager *dqm, 53 struct queue *q, 54 struct qcm_process_device *qpd); 55 56 void device_queue_manager_init_vi( 57 struct device_queue_manager_asic_ops *asic_ops) 58 { 59 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; 60 asic_ops->update_qpd = update_qpd_vi; 61 asic_ops->init_sdma_vm = init_sdma_vm; 62 asic_ops->mqd_manager_init = mqd_manager_init_vi; 63 } 64 65 void device_queue_manager_init_vi_tonga( 66 struct device_queue_manager_asic_ops *asic_ops) 67 { 68 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; 69 asic_ops->update_qpd = update_qpd_vi_tonga; 70 asic_ops->init_sdma_vm = init_sdma_vm_tonga; 71 asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga; 72 } 73 74 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 75 { 76 /* In 64-bit mode, we can only control the top 3 bits of the LDS, 77 * scratch and GPUVM apertures. 78 * The hardware fills in the remaining 59 bits according to the 79 * following pattern: 80 * LDS: X0000000'00000000 - X0000001'00000000 (4GB) 81 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) 82 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) 83 * 84 * (where X/Y is the configurable nybble with the low-bit 0) 85 * 86 * LDS and scratch will have the same top nybble programmed in the 87 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. 88 * GPUVM can have a different top nybble programmed in the 89 * top 3 bits of SH_MEM_BASES.SHARED_BASE. 90 * We don't bother to support different top nybbles 91 * for LDS/Scratch and GPUVM. 92 */ 93 94 WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || 95 top_address_nybble == 0); 96 97 return top_address_nybble << 12 | 98 (top_address_nybble << 12) << 99 SH_MEM_BASES__SHARED_BASE__SHIFT; 100 } 101 102 static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 103 struct qcm_process_device *qpd, 104 enum cache_policy default_policy, 105 enum cache_policy alternate_policy, 106 void __user *alternate_aperture_base, 107 uint64_t alternate_aperture_size) 108 { 109 uint32_t default_mtype; 110 uint32_t ape1_mtype; 111 112 default_mtype = (default_policy == cache_policy_coherent) ? 113 MTYPE_CC : 114 MTYPE_NC; 115 116 ape1_mtype = (alternate_policy == cache_policy_coherent) ? 117 MTYPE_CC : 118 MTYPE_NC; 119 120 qpd->sh_mem_config = (qpd->sh_mem_config & 121 SH_MEM_CONFIG__ADDRESS_MODE_MASK) | 122 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 123 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 124 default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 125 ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT | 126 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 127 128 return true; 129 } 130 131 static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, 132 struct qcm_process_device *qpd, 133 enum cache_policy default_policy, 134 enum cache_policy alternate_policy, 135 void __user *alternate_aperture_base, 136 uint64_t alternate_aperture_size) 137 { 138 uint32_t default_mtype; 139 uint32_t ape1_mtype; 140 141 default_mtype = (default_policy == cache_policy_coherent) ? 142 MTYPE_UC : 143 MTYPE_NC; 144 145 ape1_mtype = (alternate_policy == cache_policy_coherent) ? 146 MTYPE_UC : 147 MTYPE_NC; 148 149 qpd->sh_mem_config = 150 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 151 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 152 default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 153 ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; 154 155 return true; 156 } 157 158 static int update_qpd_vi(struct device_queue_manager *dqm, 159 struct qcm_process_device *qpd) 160 { 161 struct kfd_process_device *pdd; 162 unsigned int temp; 163 164 pdd = qpd_to_pdd(qpd); 165 166 /* check if sh_mem_config register already configured */ 167 if (qpd->sh_mem_config == 0) { 168 qpd->sh_mem_config = 169 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 170 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 171 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 172 MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT | 173 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 174 175 qpd->sh_mem_ape1_limit = 0; 176 qpd->sh_mem_ape1_base = 0; 177 } 178 179 if (qpd->pqm->process->is_32bit_user_mode) { 180 temp = get_sh_mem_bases_32(pdd); 181 qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT; 182 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 << 183 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; 184 } else { 185 temp = get_sh_mem_bases_nybble_64(pdd); 186 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 187 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << 188 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; 189 qpd->sh_mem_config |= 1 << 190 SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; 191 } 192 193 pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", 194 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); 195 196 return 0; 197 } 198 199 static int update_qpd_vi_tonga(struct device_queue_manager *dqm, 200 struct qcm_process_device *qpd) 201 { 202 struct kfd_process_device *pdd; 203 unsigned int temp; 204 205 pdd = qpd_to_pdd(qpd); 206 207 /* check if sh_mem_config register already configured */ 208 if (qpd->sh_mem_config == 0) { 209 qpd->sh_mem_config = 210 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 211 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 212 MTYPE_UC << 213 SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 214 MTYPE_UC << 215 SH_MEM_CONFIG__APE1_MTYPE__SHIFT; 216 217 qpd->sh_mem_ape1_limit = 0; 218 qpd->sh_mem_ape1_base = 0; 219 } 220 221 /* On dGPU we're always in GPUVM64 addressing mode with 64-bit 222 * aperture addresses. 223 */ 224 temp = get_sh_mem_bases_nybble_64(pdd); 225 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 226 227 pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", 228 temp, qpd->sh_mem_bases); 229 230 return 0; 231 } 232 233 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 234 struct qcm_process_device *qpd) 235 { 236 uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT); 237 238 if (q->process->is_32bit_user_mode) 239 value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) | 240 get_sh_mem_bases_32(qpd_to_pdd(qpd)); 241 else 242 value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << 243 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & 244 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; 245 246 q->properties.sdma_vm_addr = value; 247 } 248 249 static void init_sdma_vm_tonga(struct device_queue_manager *dqm, 250 struct queue *q, 251 struct qcm_process_device *qpd) 252 { 253 /* On dGPU we're always in GPUVM64 addressing mode with 64-bit 254 * aperture addresses. 255 */ 256 q->properties.sdma_vm_addr = 257 ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << 258 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & 259 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; 260 } 261