1 1.1 riastrad /* $NetBSD: kfd_crat.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2015-2017 Advanced Micro Devices, Inc. 5 1.1 riastrad * 6 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 7 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 8 1.1 riastrad * to deal in the Software without restriction, including without limitation 9 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 11 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 12 1.1 riastrad * 13 1.1 riastrad * The above copyright notice and this permission notice shall be included in 14 1.1 riastrad * all copies or substantial portions of the Software. 15 1.1 riastrad * 16 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 23 1.1 riastrad */ 24 1.1 riastrad 25 1.1 riastrad #include <sys/cdefs.h> 26 1.1 riastrad __KERNEL_RCSID(0, "$NetBSD: kfd_crat.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $"); 27 1.1 riastrad 28 1.1 riastrad #include <linux/pci.h> 29 1.1 riastrad #include <linux/acpi.h> 30 1.1 riastrad #include "kfd_crat.h" 31 1.1 riastrad #include "kfd_priv.h" 32 1.1 riastrad #include "kfd_topology.h" 33 1.1 riastrad #include "kfd_iommu.h" 34 1.1 riastrad #include "amdgpu_amdkfd.h" 35 1.1 riastrad 36 1.1 riastrad /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. 37 1.1 riastrad * GPU processor ID are expressed with Bit[31]=1. 38 1.1 riastrad * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs 39 1.1 riastrad * used in the CRAT. 40 1.1 riastrad */ 41 1.1 riastrad static uint32_t gpu_processor_id_low = 0x80001000; 42 1.1 riastrad 43 1.1 riastrad /* Return the next available gpu_processor_id and increment it for next GPU 44 1.1 riastrad * @total_cu_count - Total CUs present in the GPU including ones 45 1.1 riastrad * masked off 46 1.1 riastrad */ 47 1.1 riastrad static inline unsigned int get_and_inc_gpu_processor_id( 48 1.1 riastrad unsigned int total_cu_count) 49 1.1 riastrad { 50 1.1 riastrad int current_id = gpu_processor_id_low; 51 1.1 riastrad 52 1.1 riastrad gpu_processor_id_low += total_cu_count; 53 1.1 riastrad return current_id; 54 1.1 riastrad } 55 1.1 riastrad 56 1.1 riastrad /* Static table to describe GPU Cache information */ 57 1.1 riastrad struct kfd_gpu_cache_info { 58 1.1 riastrad uint32_t cache_size; 59 1.1 riastrad uint32_t cache_level; 60 1.1 riastrad uint32_t flags; 61 1.1 riastrad /* Indicates how many Compute Units share this cache 62 1.1 riastrad * Value = 1 indicates the cache is not shared 63 1.1 riastrad */ 64 1.1 riastrad uint32_t num_cu_shared; 65 1.1 riastrad }; 66 1.1 riastrad 67 1.1 riastrad static struct kfd_gpu_cache_info kaveri_cache_info[] = { 68 1.1 riastrad { 69 1.1 riastrad /* TCP L1 Cache per CU */ 70 1.1 riastrad .cache_size = 16, 71 1.1 riastrad .cache_level = 1, 72 1.1 riastrad .flags = (CRAT_CACHE_FLAGS_ENABLED | 73 1.1 riastrad CRAT_CACHE_FLAGS_DATA_CACHE | 74 1.1 riastrad CRAT_CACHE_FLAGS_SIMD_CACHE), 75 1.1 riastrad .num_cu_shared = 1, 76 1.1 riastrad 77 1.1 riastrad }, 78 1.1 riastrad { 79 1.1 riastrad /* Scalar L1 Instruction Cache (in SQC module) per bank */ 80 1.1 riastrad .cache_size = 16, 81 1.1 riastrad .cache_level = 1, 82 1.1 riastrad .flags = (CRAT_CACHE_FLAGS_ENABLED | 83 1.1 riastrad CRAT_CACHE_FLAGS_INST_CACHE | 84 1.1 riastrad CRAT_CACHE_FLAGS_SIMD_CACHE), 85 1.1 riastrad .num_cu_shared = 2, 86 1.1 riastrad }, 87 1.1 riastrad { 88 1.1 riastrad /* Scalar L1 Data Cache (in SQC module) per bank */ 89 1.1 riastrad .cache_size = 8, 90 1.1 riastrad .cache_level = 1, 91 1.1 riastrad .flags = (CRAT_CACHE_FLAGS_ENABLED | 92 1.1 riastrad CRAT_CACHE_FLAGS_DATA_CACHE | 93 1.1 riastrad CRAT_CACHE_FLAGS_SIMD_CACHE), 94 1.1 riastrad .num_cu_shared = 2, 95 1.1 riastrad }, 96 1.1 riastrad 97 1.1 riastrad /* TODO: Add L2 Cache information */ 98 1.1 riastrad }; 99 1.1 riastrad 100 1.1 riastrad 101 1.1 riastrad static struct kfd_gpu_cache_info carrizo_cache_info[] = { 102 1.1 riastrad { 103 1.1 riastrad /* TCP L1 Cache per CU */ 104 1.1 riastrad .cache_size = 16, 105 1.1 riastrad .cache_level = 1, 106 1.1 riastrad .flags = (CRAT_CACHE_FLAGS_ENABLED | 107 1.1 riastrad CRAT_CACHE_FLAGS_DATA_CACHE | 108 1.1 riastrad CRAT_CACHE_FLAGS_SIMD_CACHE), 109 1.1 riastrad .num_cu_shared = 1, 110 1.1 riastrad }, 111 1.1 riastrad { 112 1.1 riastrad /* Scalar L1 Instruction Cache (in SQC module) per bank */ 113 1.1 riastrad .cache_size = 8, 114 1.1 riastrad .cache_level = 1, 115 1.1 riastrad .flags = (CRAT_CACHE_FLAGS_ENABLED | 116 1.1 riastrad CRAT_CACHE_FLAGS_INST_CACHE | 117 1.1 riastrad CRAT_CACHE_FLAGS_SIMD_CACHE), 118 1.1 riastrad .num_cu_shared = 4, 119 1.1 riastrad }, 120 1.1 riastrad { 121 1.1 riastrad /* Scalar L1 Data Cache (in SQC module) per bank. */ 122 1.1 riastrad .cache_size = 4, 123 1.1 riastrad .cache_level = 1, 124 1.1 riastrad .flags = (CRAT_CACHE_FLAGS_ENABLED | 125 1.1 riastrad CRAT_CACHE_FLAGS_DATA_CACHE | 126 1.1 riastrad CRAT_CACHE_FLAGS_SIMD_CACHE), 127 1.1 riastrad .num_cu_shared = 4, 128 1.1 riastrad }, 129 1.1 riastrad 130 1.1 riastrad /* TODO: Add L2 Cache information */ 131 1.1 riastrad }; 132 1.1 riastrad 133 1.1 riastrad /* NOTE: In future if more information is added to struct kfd_gpu_cache_info 134 1.1 riastrad * the following ASICs may need a separate table. 135 1.1 riastrad */ 136 1.1 riastrad #define hawaii_cache_info kaveri_cache_info 137 1.1 riastrad #define tonga_cache_info carrizo_cache_info 138 1.1 riastrad #define fiji_cache_info carrizo_cache_info 139 1.1 riastrad #define polaris10_cache_info carrizo_cache_info 140 1.1 riastrad #define polaris11_cache_info carrizo_cache_info 141 1.1 riastrad #define polaris12_cache_info carrizo_cache_info 142 1.1 riastrad #define vegam_cache_info carrizo_cache_info 143 1.1 riastrad /* TODO - check & update Vega10 cache details */ 144 1.1 riastrad #define vega10_cache_info carrizo_cache_info 145 1.1 riastrad #define raven_cache_info carrizo_cache_info 146 1.1 riastrad #define renoir_cache_info carrizo_cache_info 147 1.1 riastrad /* TODO - check & update Navi10 cache details */ 148 1.1 riastrad #define navi10_cache_info carrizo_cache_info 149 1.1 riastrad 150 1.1 riastrad static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, 151 1.1 riastrad struct crat_subtype_computeunit *cu) 152 1.1 riastrad { 153 1.1 riastrad dev->node_props.cpu_cores_count = cu->num_cpu_cores; 154 1.1 riastrad dev->node_props.cpu_core_id_base = cu->processor_id_low; 155 1.1 riastrad if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) 156 1.1 riastrad dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 157 1.1 riastrad 158 1.1 riastrad pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, 159 1.1 riastrad cu->processor_id_low); 160 1.1 riastrad } 161 1.1 riastrad 162 1.1 riastrad static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, 163 1.1 riastrad struct crat_subtype_computeunit *cu) 164 1.1 riastrad { 165 1.1 riastrad dev->node_props.simd_id_base = cu->processor_id_low; 166 1.1 riastrad dev->node_props.simd_count = cu->num_simd_cores; 167 1.1 riastrad dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; 168 1.1 riastrad dev->node_props.max_waves_per_simd = cu->max_waves_simd; 169 1.1 riastrad dev->node_props.wave_front_size = cu->wave_front_size; 170 1.1 riastrad dev->node_props.array_count = cu->array_count; 171 1.1 riastrad dev->node_props.cu_per_simd_array = cu->num_cu_per_array; 172 1.1 riastrad dev->node_props.simd_per_cu = cu->num_simd_per_cu; 173 1.1 riastrad dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; 174 1.1 riastrad if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) 175 1.1 riastrad dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; 176 1.1 riastrad pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); 177 1.1 riastrad } 178 1.1 riastrad 179 1.1 riastrad /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct 180 1.1 riastrad * topology device present in the device_list 181 1.1 riastrad */ 182 1.1 riastrad static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, 183 1.1 riastrad struct list_head *device_list) 184 1.1 riastrad { 185 1.1 riastrad struct kfd_topology_device *dev; 186 1.1 riastrad 187 1.1 riastrad pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", 188 1.1 riastrad cu->proximity_domain, cu->hsa_capability); 189 1.1 riastrad list_for_each_entry(dev, device_list, list) { 190 1.1 riastrad if (cu->proximity_domain == dev->proximity_domain) { 191 1.1 riastrad if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) 192 1.1 riastrad kfd_populated_cu_info_cpu(dev, cu); 193 1.1 riastrad 194 1.1 riastrad if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) 195 1.1 riastrad kfd_populated_cu_info_gpu(dev, cu); 196 1.1 riastrad break; 197 1.1 riastrad } 198 1.1 riastrad } 199 1.1 riastrad 200 1.1 riastrad return 0; 201 1.1 riastrad } 202 1.1 riastrad 203 1.1 riastrad static struct kfd_mem_properties * 204 1.1 riastrad find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, 205 1.1 riastrad struct kfd_topology_device *dev) 206 1.1 riastrad { 207 1.1 riastrad struct kfd_mem_properties *props; 208 1.1 riastrad 209 1.1 riastrad list_for_each_entry(props, &dev->mem_props, list) { 210 1.1 riastrad if (props->heap_type == heap_type 211 1.1 riastrad && props->flags == flags 212 1.1 riastrad && props->width == width) 213 1.1 riastrad return props; 214 1.1 riastrad } 215 1.1 riastrad 216 1.1 riastrad return NULL; 217 1.1 riastrad } 218 1.1 riastrad /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct 219 1.1 riastrad * topology device present in the device_list 220 1.1 riastrad */ 221 1.1 riastrad static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, 222 1.1 riastrad struct list_head *device_list) 223 1.1 riastrad { 224 1.1 riastrad struct kfd_mem_properties *props; 225 1.1 riastrad struct kfd_topology_device *dev; 226 1.1 riastrad uint32_t heap_type; 227 1.1 riastrad uint64_t size_in_bytes; 228 1.1 riastrad uint32_t flags = 0; 229 1.1 riastrad uint32_t width; 230 1.1 riastrad 231 1.1 riastrad pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", 232 1.1 riastrad mem->proximity_domain); 233 1.1 riastrad list_for_each_entry(dev, device_list, list) { 234 1.1 riastrad if (mem->proximity_domain == dev->proximity_domain) { 235 1.1 riastrad /* We're on GPU node */ 236 1.1 riastrad if (dev->node_props.cpu_cores_count == 0) { 237 1.1 riastrad /* APU */ 238 1.1 riastrad if (mem->visibility_type == 0) 239 1.1 riastrad heap_type = 240 1.1 riastrad HSA_MEM_HEAP_TYPE_FB_PRIVATE; 241 1.1 riastrad /* dGPU */ 242 1.1 riastrad else 243 1.1 riastrad heap_type = mem->visibility_type; 244 1.1 riastrad } else 245 1.1 riastrad heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; 246 1.1 riastrad 247 1.1 riastrad if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) 248 1.1 riastrad flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; 249 1.1 riastrad if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) 250 1.1 riastrad flags |= HSA_MEM_FLAGS_NON_VOLATILE; 251 1.1 riastrad 252 1.1 riastrad size_in_bytes = 253 1.1 riastrad ((uint64_t)mem->length_high << 32) + 254 1.1 riastrad mem->length_low; 255 1.1 riastrad width = mem->width; 256 1.1 riastrad 257 1.1 riastrad /* Multiple banks of the same type are aggregated into 258 1.1 riastrad * one. User mode doesn't care about multiple physical 259 1.1 riastrad * memory segments. It's managed as a single virtual 260 1.1 riastrad * heap for user mode. 261 1.1 riastrad */ 262 1.1 riastrad props = find_subtype_mem(heap_type, flags, width, dev); 263 1.1 riastrad if (props) { 264 1.1 riastrad props->size_in_bytes += size_in_bytes; 265 1.1 riastrad break; 266 1.1 riastrad } 267 1.1 riastrad 268 1.1 riastrad props = kfd_alloc_struct(props); 269 1.1 riastrad if (!props) 270 1.1 riastrad return -ENOMEM; 271 1.1 riastrad 272 1.1 riastrad props->heap_type = heap_type; 273 1.1 riastrad props->flags = flags; 274 1.1 riastrad props->size_in_bytes = size_in_bytes; 275 1.1 riastrad props->width = width; 276 1.1 riastrad 277 1.1 riastrad dev->node_props.mem_banks_count++; 278 1.1 riastrad list_add_tail(&props->list, &dev->mem_props); 279 1.1 riastrad 280 1.1 riastrad break; 281 1.1 riastrad } 282 1.1 riastrad } 283 1.1 riastrad 284 1.1 riastrad return 0; 285 1.1 riastrad } 286 1.1 riastrad 287 1.1 riastrad /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct 288 1.1 riastrad * topology device present in the device_list 289 1.1 riastrad */ 290 1.1 riastrad static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, 291 1.1 riastrad struct list_head *device_list) 292 1.1 riastrad { 293 1.1 riastrad struct kfd_cache_properties *props; 294 1.1 riastrad struct kfd_topology_device *dev; 295 1.1 riastrad uint32_t id; 296 1.1 riastrad uint32_t total_num_of_cu; 297 1.1 riastrad 298 1.1 riastrad id = cache->processor_id_low; 299 1.1 riastrad 300 1.1 riastrad pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); 301 1.1 riastrad list_for_each_entry(dev, device_list, list) { 302 1.1 riastrad total_num_of_cu = (dev->node_props.array_count * 303 1.1 riastrad dev->node_props.cu_per_simd_array); 304 1.1 riastrad 305 1.1 riastrad /* Cache infomration in CRAT doesn't have proximity_domain 306 1.1 riastrad * information as it is associated with a CPU core or GPU 307 1.1 riastrad * Compute Unit. So map the cache using CPU core Id or SIMD 308 1.1 riastrad * (GPU) ID. 309 1.1 riastrad * TODO: This works because currently we can safely assume that 310 1.1 riastrad * Compute Units are parsed before caches are parsed. In 311 1.1 riastrad * future, remove this dependency 312 1.1 riastrad */ 313 1.1 riastrad if ((id >= dev->node_props.cpu_core_id_base && 314 1.1 riastrad id <= dev->node_props.cpu_core_id_base + 315 1.1 riastrad dev->node_props.cpu_cores_count) || 316 1.1 riastrad (id >= dev->node_props.simd_id_base && 317 1.1 riastrad id < dev->node_props.simd_id_base + 318 1.1 riastrad total_num_of_cu)) { 319 1.1 riastrad props = kfd_alloc_struct(props); 320 1.1 riastrad if (!props) 321 1.1 riastrad return -ENOMEM; 322 1.1 riastrad 323 1.1 riastrad props->processor_id_low = id; 324 1.1 riastrad props->cache_level = cache->cache_level; 325 1.1 riastrad props->cache_size = cache->cache_size; 326 1.1 riastrad props->cacheline_size = cache->cache_line_size; 327 1.1 riastrad props->cachelines_per_tag = cache->lines_per_tag; 328 1.1 riastrad props->cache_assoc = cache->associativity; 329 1.1 riastrad props->cache_latency = cache->cache_latency; 330 1.1 riastrad memcpy(props->sibling_map, cache->sibling_map, 331 1.1 riastrad sizeof(props->sibling_map)); 332 1.1 riastrad 333 1.1 riastrad if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) 334 1.1 riastrad props->cache_type |= HSA_CACHE_TYPE_DATA; 335 1.1 riastrad if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) 336 1.1 riastrad props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 337 1.1 riastrad if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) 338 1.1 riastrad props->cache_type |= HSA_CACHE_TYPE_CPU; 339 1.1 riastrad if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 340 1.1 riastrad props->cache_type |= HSA_CACHE_TYPE_HSACU; 341 1.1 riastrad 342 1.1 riastrad dev->cache_count++; 343 1.1 riastrad dev->node_props.caches_count++; 344 1.1 riastrad list_add_tail(&props->list, &dev->cache_props); 345 1.1 riastrad 346 1.1 riastrad break; 347 1.1 riastrad } 348 1.1 riastrad } 349 1.1 riastrad 350 1.1 riastrad return 0; 351 1.1 riastrad } 352 1.1 riastrad 353 1.1 riastrad /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct 354 1.1 riastrad * topology device present in the device_list 355 1.1 riastrad */ 356 1.1 riastrad static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, 357 1.1 riastrad struct list_head *device_list) 358 1.1 riastrad { 359 1.1 riastrad struct kfd_iolink_properties *props = NULL, *props2; 360 1.1 riastrad struct kfd_topology_device *dev, *to_dev; 361 1.1 riastrad uint32_t id_from; 362 1.1 riastrad uint32_t id_to; 363 1.1 riastrad 364 1.1 riastrad id_from = iolink->proximity_domain_from; 365 1.1 riastrad id_to = iolink->proximity_domain_to; 366 1.1 riastrad 367 1.1 riastrad pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", 368 1.1 riastrad id_from, id_to); 369 1.1 riastrad list_for_each_entry(dev, device_list, list) { 370 1.1 riastrad if (id_from == dev->proximity_domain) { 371 1.1 riastrad props = kfd_alloc_struct(props); 372 1.1 riastrad if (!props) 373 1.1 riastrad return -ENOMEM; 374 1.1 riastrad 375 1.1 riastrad props->node_from = id_from; 376 1.1 riastrad props->node_to = id_to; 377 1.1 riastrad props->ver_maj = iolink->version_major; 378 1.1 riastrad props->ver_min = iolink->version_minor; 379 1.1 riastrad props->iolink_type = iolink->io_interface_type; 380 1.1 riastrad 381 1.1 riastrad if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 382 1.1 riastrad props->weight = 20; 383 1.1 riastrad else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) 384 1.1 riastrad props->weight = 15 * iolink->num_hops_xgmi; 385 1.1 riastrad else 386 1.1 riastrad props->weight = node_distance(id_from, id_to); 387 1.1 riastrad 388 1.1 riastrad props->min_latency = iolink->minimum_latency; 389 1.1 riastrad props->max_latency = iolink->maximum_latency; 390 1.1 riastrad props->min_bandwidth = iolink->minimum_bandwidth_mbs; 391 1.1 riastrad props->max_bandwidth = iolink->maximum_bandwidth_mbs; 392 1.1 riastrad props->rec_transfer_size = 393 1.1 riastrad iolink->recommended_transfer_size; 394 1.1 riastrad 395 1.1 riastrad dev->io_link_count++; 396 1.1 riastrad dev->node_props.io_links_count++; 397 1.1 riastrad list_add_tail(&props->list, &dev->io_link_props); 398 1.1 riastrad break; 399 1.1 riastrad } 400 1.1 riastrad } 401 1.1 riastrad 402 1.1 riastrad /* CPU topology is created before GPUs are detected, so CPU->GPU 403 1.1 riastrad * links are not built at that time. If a PCIe type is discovered, it 404 1.1 riastrad * means a GPU is detected and we are adding GPU->CPU to the topology. 405 1.1 riastrad * At this time, also add the corresponded CPU->GPU link if GPU 406 1.1 riastrad * is large bar. 407 1.1 riastrad * For xGMI, we only added the link with one direction in the crat 408 1.1 riastrad * table, add corresponded reversed direction link now. 409 1.1 riastrad */ 410 1.1 riastrad if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { 411 1.1 riastrad to_dev = kfd_topology_device_by_proximity_domain(id_to); 412 1.1 riastrad if (!to_dev) 413 1.1 riastrad return -ENODEV; 414 1.1 riastrad /* same everything but the other direction */ 415 1.1 riastrad props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); 416 1.1 riastrad props2->node_from = id_to; 417 1.1 riastrad props2->node_to = id_from; 418 1.1 riastrad props2->kobj = NULL; 419 1.1 riastrad to_dev->io_link_count++; 420 1.1 riastrad to_dev->node_props.io_links_count++; 421 1.1 riastrad list_add_tail(&props2->list, &to_dev->io_link_props); 422 1.1 riastrad } 423 1.1 riastrad 424 1.1 riastrad return 0; 425 1.1 riastrad } 426 1.1 riastrad 427 1.1 riastrad /* kfd_parse_subtype - parse subtypes and attach it to correct topology device 428 1.1 riastrad * present in the device_list 429 1.1 riastrad * @sub_type_hdr - subtype section of crat_image 430 1.1 riastrad * @device_list - list of topology devices present in this crat_image 431 1.1 riastrad */ 432 1.1 riastrad static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, 433 1.1 riastrad struct list_head *device_list) 434 1.1 riastrad { 435 1.1 riastrad struct crat_subtype_computeunit *cu; 436 1.1 riastrad struct crat_subtype_memory *mem; 437 1.1 riastrad struct crat_subtype_cache *cache; 438 1.1 riastrad struct crat_subtype_iolink *iolink; 439 1.1 riastrad int ret = 0; 440 1.1 riastrad 441 1.1 riastrad switch (sub_type_hdr->type) { 442 1.1 riastrad case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: 443 1.1 riastrad cu = (struct crat_subtype_computeunit *)sub_type_hdr; 444 1.1 riastrad ret = kfd_parse_subtype_cu(cu, device_list); 445 1.1 riastrad break; 446 1.1 riastrad case CRAT_SUBTYPE_MEMORY_AFFINITY: 447 1.1 riastrad mem = (struct crat_subtype_memory *)sub_type_hdr; 448 1.1 riastrad ret = kfd_parse_subtype_mem(mem, device_list); 449 1.1 riastrad break; 450 1.1 riastrad case CRAT_SUBTYPE_CACHE_AFFINITY: 451 1.1 riastrad cache = (struct crat_subtype_cache *)sub_type_hdr; 452 1.1 riastrad ret = kfd_parse_subtype_cache(cache, device_list); 453 1.1 riastrad break; 454 1.1 riastrad case CRAT_SUBTYPE_TLB_AFFINITY: 455 1.1 riastrad /* 456 1.1 riastrad * For now, nothing to do here 457 1.1 riastrad */ 458 1.1 riastrad pr_debug("Found TLB entry in CRAT table (not processing)\n"); 459 1.1 riastrad break; 460 1.1 riastrad case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: 461 1.1 riastrad /* 462 1.1 riastrad * For now, nothing to do here 463 1.1 riastrad */ 464 1.1 riastrad pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); 465 1.1 riastrad break; 466 1.1 riastrad case CRAT_SUBTYPE_IOLINK_AFFINITY: 467 1.1 riastrad iolink = (struct crat_subtype_iolink *)sub_type_hdr; 468 1.1 riastrad ret = kfd_parse_subtype_iolink(iolink, device_list); 469 1.1 riastrad break; 470 1.1 riastrad default: 471 1.1 riastrad pr_warn("Unknown subtype %d in CRAT\n", 472 1.1 riastrad sub_type_hdr->type); 473 1.1 riastrad } 474 1.1 riastrad 475 1.1 riastrad return ret; 476 1.1 riastrad } 477 1.1 riastrad 478 1.1 riastrad /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT 479 1.1 riastrad * create a kfd_topology_device and add in to device_list. Also parse 480 1.1 riastrad * CRAT subtypes and attach it to appropriate kfd_topology_device 481 1.1 riastrad * @crat_image - input image containing CRAT 482 1.1 riastrad * @device_list - [OUT] list of kfd_topology_device generated after 483 1.1 riastrad * parsing crat_image 484 1.1 riastrad * @proximity_domain - Proximity domain of the first device in the table 485 1.1 riastrad * 486 1.1 riastrad * Return - 0 if successful else -ve value 487 1.1 riastrad */ 488 1.1 riastrad int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 489 1.1 riastrad uint32_t proximity_domain) 490 1.1 riastrad { 491 1.1 riastrad struct kfd_topology_device *top_dev = NULL; 492 1.1 riastrad struct crat_subtype_generic *sub_type_hdr; 493 1.1 riastrad uint16_t node_id; 494 1.1 riastrad int ret = 0; 495 1.1 riastrad struct crat_header *crat_table = (struct crat_header *)crat_image; 496 1.1 riastrad uint16_t num_nodes; 497 1.1 riastrad uint32_t image_len; 498 1.1 riastrad 499 1.1 riastrad if (!crat_image) 500 1.1 riastrad return -EINVAL; 501 1.1 riastrad 502 1.1 riastrad if (!list_empty(device_list)) { 503 1.1 riastrad pr_warn("Error device list should be empty\n"); 504 1.1 riastrad return -EINVAL; 505 1.1 riastrad } 506 1.1 riastrad 507 1.1 riastrad num_nodes = crat_table->num_domains; 508 1.1 riastrad image_len = crat_table->length; 509 1.1 riastrad 510 1.1 riastrad pr_info("Parsing CRAT table with %d nodes\n", num_nodes); 511 1.1 riastrad 512 1.1 riastrad for (node_id = 0; node_id < num_nodes; node_id++) { 513 1.1 riastrad top_dev = kfd_create_topology_device(device_list); 514 1.1 riastrad if (!top_dev) 515 1.1 riastrad break; 516 1.1 riastrad top_dev->proximity_domain = proximity_domain++; 517 1.1 riastrad } 518 1.1 riastrad 519 1.1 riastrad if (!top_dev) { 520 1.1 riastrad ret = -ENOMEM; 521 1.1 riastrad goto err; 522 1.1 riastrad } 523 1.1 riastrad 524 1.1 riastrad memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); 525 1.1 riastrad memcpy(top_dev->oem_table_id, crat_table->oem_table_id, 526 1.1 riastrad CRAT_OEMTABLEID_LENGTH); 527 1.1 riastrad top_dev->oem_revision = crat_table->oem_revision; 528 1.1 riastrad 529 1.1 riastrad sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 530 1.1 riastrad while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < 531 1.1 riastrad ((char *)crat_image) + image_len) { 532 1.1 riastrad if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { 533 1.1 riastrad ret = kfd_parse_subtype(sub_type_hdr, device_list); 534 1.1 riastrad if (ret) 535 1.1 riastrad break; 536 1.1 riastrad } 537 1.1 riastrad 538 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 539 1.1 riastrad sub_type_hdr->length); 540 1.1 riastrad } 541 1.1 riastrad 542 1.1 riastrad err: 543 1.1 riastrad if (ret) 544 1.1 riastrad kfd_release_topology_device_list(device_list); 545 1.1 riastrad 546 1.1 riastrad return ret; 547 1.1 riastrad } 548 1.1 riastrad 549 1.1 riastrad /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ 550 1.1 riastrad static int fill_in_pcache(struct crat_subtype_cache *pcache, 551 1.1 riastrad struct kfd_gpu_cache_info *pcache_info, 552 1.1 riastrad struct kfd_cu_info *cu_info, 553 1.1 riastrad int mem_available, 554 1.1 riastrad int cu_bitmask, 555 1.1 riastrad int cache_type, unsigned int cu_processor_id, 556 1.1 riastrad int cu_block) 557 1.1 riastrad { 558 1.1 riastrad unsigned int cu_sibling_map_mask; 559 1.1 riastrad int first_active_cu; 560 1.1 riastrad 561 1.1 riastrad /* First check if enough memory is available */ 562 1.1 riastrad if (sizeof(struct crat_subtype_cache) > mem_available) 563 1.1 riastrad return -ENOMEM; 564 1.1 riastrad 565 1.1 riastrad cu_sibling_map_mask = cu_bitmask; 566 1.1 riastrad cu_sibling_map_mask >>= cu_block; 567 1.1 riastrad cu_sibling_map_mask &= 568 1.1 riastrad ((1 << pcache_info[cache_type].num_cu_shared) - 1); 569 1.1 riastrad first_active_cu = ffs(cu_sibling_map_mask); 570 1.1 riastrad 571 1.1 riastrad /* CU could be inactive. In case of shared cache find the first active 572 1.1 riastrad * CU. and incase of non-shared cache check if the CU is inactive. If 573 1.1 riastrad * inactive active skip it 574 1.1 riastrad */ 575 1.1 riastrad if (first_active_cu) { 576 1.1 riastrad memset(pcache, 0, sizeof(struct crat_subtype_cache)); 577 1.1 riastrad pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; 578 1.1 riastrad pcache->length = sizeof(struct crat_subtype_cache); 579 1.1 riastrad pcache->flags = pcache_info[cache_type].flags; 580 1.1 riastrad pcache->processor_id_low = cu_processor_id 581 1.1 riastrad + (first_active_cu - 1); 582 1.1 riastrad pcache->cache_level = pcache_info[cache_type].cache_level; 583 1.1 riastrad pcache->cache_size = pcache_info[cache_type].cache_size; 584 1.1 riastrad 585 1.1 riastrad /* Sibling map is w.r.t processor_id_low, so shift out 586 1.1 riastrad * inactive CU 587 1.1 riastrad */ 588 1.1 riastrad cu_sibling_map_mask = 589 1.1 riastrad cu_sibling_map_mask >> (first_active_cu - 1); 590 1.1 riastrad 591 1.1 riastrad pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); 592 1.1 riastrad pcache->sibling_map[1] = 593 1.1 riastrad (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); 594 1.1 riastrad pcache->sibling_map[2] = 595 1.1 riastrad (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); 596 1.1 riastrad pcache->sibling_map[3] = 597 1.1 riastrad (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); 598 1.1 riastrad return 0; 599 1.1 riastrad } 600 1.1 riastrad return 1; 601 1.1 riastrad } 602 1.1 riastrad 603 1.1 riastrad /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info 604 1.1 riastrad * tables 605 1.1 riastrad * 606 1.1 riastrad * @kdev - [IN] GPU device 607 1.1 riastrad * @gpu_processor_id - [IN] GPU processor ID to which these caches 608 1.1 riastrad * associate 609 1.1 riastrad * @available_size - [IN] Amount of memory available in pcache 610 1.1 riastrad * @cu_info - [IN] Compute Unit info obtained from KGD 611 1.1 riastrad * @pcache - [OUT] memory into which cache data is to be filled in. 612 1.1 riastrad * @size_filled - [OUT] amount of data used up in pcache. 613 1.1 riastrad * @num_of_entries - [OUT] number of caches added 614 1.1 riastrad */ 615 1.1 riastrad static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, 616 1.1 riastrad int gpu_processor_id, 617 1.1 riastrad int available_size, 618 1.1 riastrad struct kfd_cu_info *cu_info, 619 1.1 riastrad struct crat_subtype_cache *pcache, 620 1.1 riastrad int *size_filled, 621 1.1 riastrad int *num_of_entries) 622 1.1 riastrad { 623 1.1 riastrad struct kfd_gpu_cache_info *pcache_info; 624 1.1 riastrad int num_of_cache_types = 0; 625 1.1 riastrad int i, j, k; 626 1.1 riastrad int ct = 0; 627 1.1 riastrad int mem_available = available_size; 628 1.1 riastrad unsigned int cu_processor_id; 629 1.1 riastrad int ret; 630 1.1 riastrad 631 1.1 riastrad switch (kdev->device_info->asic_family) { 632 1.1 riastrad case CHIP_KAVERI: 633 1.1 riastrad pcache_info = kaveri_cache_info; 634 1.1 riastrad num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); 635 1.1 riastrad break; 636 1.1 riastrad case CHIP_HAWAII: 637 1.1 riastrad pcache_info = hawaii_cache_info; 638 1.1 riastrad num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); 639 1.1 riastrad break; 640 1.1 riastrad case CHIP_CARRIZO: 641 1.1 riastrad pcache_info = carrizo_cache_info; 642 1.1 riastrad num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); 643 1.1 riastrad break; 644 1.1 riastrad case CHIP_TONGA: 645 1.1 riastrad pcache_info = tonga_cache_info; 646 1.1 riastrad num_of_cache_types = ARRAY_SIZE(tonga_cache_info); 647 1.1 riastrad break; 648 1.1 riastrad case CHIP_FIJI: 649 1.1 riastrad pcache_info = fiji_cache_info; 650 1.1 riastrad num_of_cache_types = ARRAY_SIZE(fiji_cache_info); 651 1.1 riastrad break; 652 1.1 riastrad case CHIP_POLARIS10: 653 1.1 riastrad pcache_info = polaris10_cache_info; 654 1.1 riastrad num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); 655 1.1 riastrad break; 656 1.1 riastrad case CHIP_POLARIS11: 657 1.1 riastrad pcache_info = polaris11_cache_info; 658 1.1 riastrad num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); 659 1.1 riastrad break; 660 1.1 riastrad case CHIP_POLARIS12: 661 1.1 riastrad pcache_info = polaris12_cache_info; 662 1.1 riastrad num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); 663 1.1 riastrad break; 664 1.1 riastrad case CHIP_VEGAM: 665 1.1 riastrad pcache_info = vegam_cache_info; 666 1.1 riastrad num_of_cache_types = ARRAY_SIZE(vegam_cache_info); 667 1.1 riastrad break; 668 1.1 riastrad case CHIP_VEGA10: 669 1.1 riastrad case CHIP_VEGA12: 670 1.1 riastrad case CHIP_VEGA20: 671 1.1 riastrad case CHIP_ARCTURUS: 672 1.1 riastrad pcache_info = vega10_cache_info; 673 1.1 riastrad num_of_cache_types = ARRAY_SIZE(vega10_cache_info); 674 1.1 riastrad break; 675 1.1 riastrad case CHIP_RAVEN: 676 1.1 riastrad pcache_info = raven_cache_info; 677 1.1 riastrad num_of_cache_types = ARRAY_SIZE(raven_cache_info); 678 1.1 riastrad break; 679 1.1 riastrad case CHIP_RENOIR: 680 1.1 riastrad pcache_info = renoir_cache_info; 681 1.1 riastrad num_of_cache_types = ARRAY_SIZE(renoir_cache_info); 682 1.1 riastrad break; 683 1.1 riastrad case CHIP_NAVI10: 684 1.1 riastrad case CHIP_NAVI12: 685 1.1 riastrad case CHIP_NAVI14: 686 1.1 riastrad pcache_info = navi10_cache_info; 687 1.1 riastrad num_of_cache_types = ARRAY_SIZE(navi10_cache_info); 688 1.1 riastrad break; 689 1.1 riastrad default: 690 1.1 riastrad return -EINVAL; 691 1.1 riastrad } 692 1.1 riastrad 693 1.1 riastrad *size_filled = 0; 694 1.1 riastrad *num_of_entries = 0; 695 1.1 riastrad 696 1.1 riastrad /* For each type of cache listed in the kfd_gpu_cache_info table, 697 1.1 riastrad * go through all available Compute Units. 698 1.1 riastrad * The [i,j,k] loop will 699 1.1 riastrad * if kfd_gpu_cache_info.num_cu_shared = 1 700 1.1 riastrad * will parse through all available CU 701 1.1 riastrad * If (kfd_gpu_cache_info.num_cu_shared != 1) 702 1.1 riastrad * then it will consider only one CU from 703 1.1 riastrad * the shared unit 704 1.1 riastrad */ 705 1.1 riastrad 706 1.1 riastrad for (ct = 0; ct < num_of_cache_types; ct++) { 707 1.1 riastrad cu_processor_id = gpu_processor_id; 708 1.1 riastrad for (i = 0; i < cu_info->num_shader_engines; i++) { 709 1.1 riastrad for (j = 0; j < cu_info->num_shader_arrays_per_engine; 710 1.1 riastrad j++) { 711 1.1 riastrad for (k = 0; k < cu_info->num_cu_per_sh; 712 1.1 riastrad k += pcache_info[ct].num_cu_shared) { 713 1.1 riastrad 714 1.1 riastrad ret = fill_in_pcache(pcache, 715 1.1 riastrad pcache_info, 716 1.1 riastrad cu_info, 717 1.1 riastrad mem_available, 718 1.1 riastrad cu_info->cu_bitmap[i % 4][j + i / 4], 719 1.1 riastrad ct, 720 1.1 riastrad cu_processor_id, 721 1.1 riastrad k); 722 1.1 riastrad 723 1.1 riastrad if (ret < 0) 724 1.1 riastrad break; 725 1.1 riastrad 726 1.1 riastrad if (!ret) { 727 1.1 riastrad pcache++; 728 1.1 riastrad (*num_of_entries)++; 729 1.1 riastrad mem_available -= 730 1.1 riastrad sizeof(*pcache); 731 1.1 riastrad (*size_filled) += 732 1.1 riastrad sizeof(*pcache); 733 1.1 riastrad } 734 1.1 riastrad 735 1.1 riastrad /* Move to next CU block */ 736 1.1 riastrad cu_processor_id += 737 1.1 riastrad pcache_info[ct].num_cu_shared; 738 1.1 riastrad } 739 1.1 riastrad } 740 1.1 riastrad } 741 1.1 riastrad } 742 1.1 riastrad 743 1.1 riastrad pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); 744 1.1 riastrad 745 1.1 riastrad return 0; 746 1.1 riastrad } 747 1.1 riastrad 748 1.1 riastrad /* 749 1.1 riastrad * kfd_create_crat_image_acpi - Allocates memory for CRAT image and 750 1.1 riastrad * copies CRAT from ACPI (if available). 751 1.1 riastrad * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 752 1.1 riastrad * 753 1.1 riastrad * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then 754 1.1 riastrad * crat_image will be NULL 755 1.1 riastrad * @size: [OUT] size of crat_image 756 1.1 riastrad * 757 1.1 riastrad * Return 0 if successful else return error code 758 1.1 riastrad */ 759 1.1 riastrad int kfd_create_crat_image_acpi(void **crat_image, size_t *size) 760 1.1 riastrad { 761 1.1 riastrad struct acpi_table_header *crat_table; 762 1.1 riastrad acpi_status status; 763 1.1 riastrad void *pcrat_image; 764 1.1 riastrad 765 1.1 riastrad if (!crat_image) 766 1.1 riastrad return -EINVAL; 767 1.1 riastrad 768 1.1 riastrad *crat_image = NULL; 769 1.1 riastrad 770 1.1 riastrad /* Fetch the CRAT table from ACPI */ 771 1.1 riastrad status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); 772 1.1 riastrad if (status == AE_NOT_FOUND) { 773 1.1 riastrad pr_warn("CRAT table not found\n"); 774 1.1 riastrad return -ENODATA; 775 1.1 riastrad } else if (ACPI_FAILURE(status)) { 776 1.1 riastrad const char *err = acpi_format_exception(status); 777 1.1 riastrad 778 1.1 riastrad pr_err("CRAT table error: %s\n", err); 779 1.1 riastrad return -EINVAL; 780 1.1 riastrad } 781 1.1 riastrad 782 1.1 riastrad if (ignore_crat) { 783 1.1 riastrad pr_info("CRAT table disabled by module option\n"); 784 1.1 riastrad return -ENODATA; 785 1.1 riastrad } 786 1.1 riastrad 787 1.1 riastrad pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL); 788 1.1 riastrad if (!pcrat_image) 789 1.1 riastrad return -ENOMEM; 790 1.1 riastrad 791 1.1 riastrad *crat_image = pcrat_image; 792 1.1 riastrad *size = crat_table->length; 793 1.1 riastrad 794 1.1 riastrad return 0; 795 1.1 riastrad } 796 1.1 riastrad 797 1.1 riastrad /* Memory required to create Virtual CRAT. 798 1.1 riastrad * Since there is no easy way to predict the amount of memory required, the 799 1.1 riastrad * following amount are allocated for CPU and GPU Virtual CRAT. This is 800 1.1 riastrad * expected to cover all known conditions. But to be safe additional check 801 1.1 riastrad * is put in the code to ensure we don't overwrite. 802 1.1 riastrad */ 803 1.1 riastrad #define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) 804 1.1 riastrad #define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) 805 1.1 riastrad 806 1.1 riastrad /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node 807 1.1 riastrad * 808 1.1 riastrad * @numa_node_id: CPU NUMA node id 809 1.1 riastrad * @avail_size: Available size in the memory 810 1.1 riastrad * @sub_type_hdr: Memory into which compute info will be filled in 811 1.1 riastrad * 812 1.1 riastrad * Return 0 if successful else return -ve value 813 1.1 riastrad */ 814 1.1 riastrad static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, 815 1.1 riastrad int proximity_domain, 816 1.1 riastrad struct crat_subtype_computeunit *sub_type_hdr) 817 1.1 riastrad { 818 1.1 riastrad const struct cpumask *cpumask; 819 1.1 riastrad 820 1.1 riastrad *avail_size -= sizeof(struct crat_subtype_computeunit); 821 1.1 riastrad if (*avail_size < 0) 822 1.1 riastrad return -ENOMEM; 823 1.1 riastrad 824 1.1 riastrad memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 825 1.1 riastrad 826 1.1 riastrad /* Fill in subtype header data */ 827 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 828 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 829 1.1 riastrad sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 830 1.1 riastrad 831 1.1 riastrad cpumask = cpumask_of_node(numa_node_id); 832 1.1 riastrad 833 1.1 riastrad /* Fill in CU data */ 834 1.1 riastrad sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; 835 1.1 riastrad sub_type_hdr->proximity_domain = proximity_domain; 836 1.1 riastrad sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); 837 1.1 riastrad if (sub_type_hdr->processor_id_low == -1) 838 1.1 riastrad return -EINVAL; 839 1.1 riastrad 840 1.1 riastrad sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); 841 1.1 riastrad 842 1.1 riastrad return 0; 843 1.1 riastrad } 844 1.1 riastrad 845 1.1 riastrad /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node 846 1.1 riastrad * 847 1.1 riastrad * @numa_node_id: CPU NUMA node id 848 1.1 riastrad * @avail_size: Available size in the memory 849 1.1 riastrad * @sub_type_hdr: Memory into which compute info will be filled in 850 1.1 riastrad * 851 1.1 riastrad * Return 0 if successful else return -ve value 852 1.1 riastrad */ 853 1.1 riastrad static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, 854 1.1 riastrad int proximity_domain, 855 1.1 riastrad struct crat_subtype_memory *sub_type_hdr) 856 1.1 riastrad { 857 1.1 riastrad uint64_t mem_in_bytes = 0; 858 1.1 riastrad pg_data_t *pgdat; 859 1.1 riastrad int zone_type; 860 1.1 riastrad 861 1.1 riastrad *avail_size -= sizeof(struct crat_subtype_memory); 862 1.1 riastrad if (*avail_size < 0) 863 1.1 riastrad return -ENOMEM; 864 1.1 riastrad 865 1.1 riastrad memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 866 1.1 riastrad 867 1.1 riastrad /* Fill in subtype header data */ 868 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 869 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_memory); 870 1.1 riastrad sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 871 1.1 riastrad 872 1.1 riastrad /* Fill in Memory Subunit data */ 873 1.1 riastrad 874 1.1 riastrad /* Unlike si_meminfo, si_meminfo_node is not exported. So 875 1.1 riastrad * the following lines are duplicated from si_meminfo_node 876 1.1 riastrad * function 877 1.1 riastrad */ 878 1.1 riastrad pgdat = NODE_DATA(numa_node_id); 879 1.1 riastrad for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 880 1.1 riastrad mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); 881 1.1 riastrad mem_in_bytes <<= PAGE_SHIFT; 882 1.1 riastrad 883 1.1 riastrad sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); 884 1.1 riastrad sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); 885 1.1 riastrad sub_type_hdr->proximity_domain = proximity_domain; 886 1.1 riastrad 887 1.1 riastrad return 0; 888 1.1 riastrad } 889 1.1 riastrad 890 1.1 riastrad #ifdef CONFIG_X86_64 891 1.1 riastrad static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, 892 1.1 riastrad uint32_t *num_entries, 893 1.1 riastrad struct crat_subtype_iolink *sub_type_hdr) 894 1.1 riastrad { 895 1.1 riastrad int nid; 896 1.1 riastrad struct cpuinfo_x86 *c = &cpu_data(0); 897 1.1 riastrad uint8_t link_type; 898 1.1 riastrad 899 1.1 riastrad if (c->x86_vendor == X86_VENDOR_AMD) 900 1.1 riastrad link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; 901 1.1 riastrad else 902 1.1 riastrad link_type = CRAT_IOLINK_TYPE_QPI_1_1; 903 1.1 riastrad 904 1.1 riastrad *num_entries = 0; 905 1.1 riastrad 906 1.1 riastrad /* Create IO links from this node to other CPU nodes */ 907 1.1 riastrad for_each_online_node(nid) { 908 1.1 riastrad if (nid == numa_node_id) /* node itself */ 909 1.1 riastrad continue; 910 1.1 riastrad 911 1.1 riastrad *avail_size -= sizeof(struct crat_subtype_iolink); 912 1.1 riastrad if (*avail_size < 0) 913 1.1 riastrad return -ENOMEM; 914 1.1 riastrad 915 1.1 riastrad memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 916 1.1 riastrad 917 1.1 riastrad /* Fill in subtype header data */ 918 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 919 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 920 1.1 riastrad sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 921 1.1 riastrad 922 1.1 riastrad /* Fill in IO link data */ 923 1.1 riastrad sub_type_hdr->proximity_domain_from = numa_node_id; 924 1.1 riastrad sub_type_hdr->proximity_domain_to = nid; 925 1.1 riastrad sub_type_hdr->io_interface_type = link_type; 926 1.1 riastrad 927 1.1 riastrad (*num_entries)++; 928 1.1 riastrad sub_type_hdr++; 929 1.1 riastrad } 930 1.1 riastrad 931 1.1 riastrad return 0; 932 1.1 riastrad } 933 1.1 riastrad #endif 934 1.1 riastrad 935 1.1 riastrad /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU 936 1.1 riastrad * 937 1.1 riastrad * @pcrat_image: Fill in VCRAT for CPU 938 1.1 riastrad * @size: [IN] allocated size of crat_image. 939 1.1 riastrad * [OUT] actual size of data filled in crat_image 940 1.1 riastrad */ 941 1.1 riastrad static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) 942 1.1 riastrad { 943 1.1 riastrad struct crat_header *crat_table = (struct crat_header *)pcrat_image; 944 1.1 riastrad struct acpi_table_header *acpi_table; 945 1.1 riastrad acpi_status status; 946 1.1 riastrad struct crat_subtype_generic *sub_type_hdr; 947 1.1 riastrad int avail_size = *size; 948 1.1 riastrad int numa_node_id; 949 1.1 riastrad #ifdef CONFIG_X86_64 950 1.1 riastrad uint32_t entries = 0; 951 1.1 riastrad #endif 952 1.1 riastrad int ret = 0; 953 1.1 riastrad 954 1.1 riastrad if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) 955 1.1 riastrad return -EINVAL; 956 1.1 riastrad 957 1.1 riastrad /* Fill in CRAT Header. 958 1.1 riastrad * Modify length and total_entries as subunits are added. 959 1.1 riastrad */ 960 1.1 riastrad avail_size -= sizeof(struct crat_header); 961 1.1 riastrad if (avail_size < 0) 962 1.1 riastrad return -ENOMEM; 963 1.1 riastrad 964 1.1 riastrad memset(crat_table, 0, sizeof(struct crat_header)); 965 1.1 riastrad memcpy(&crat_table->signature, CRAT_SIGNATURE, 966 1.1 riastrad sizeof(crat_table->signature)); 967 1.1 riastrad crat_table->length = sizeof(struct crat_header); 968 1.1 riastrad 969 1.1 riastrad status = acpi_get_table("DSDT", 0, &acpi_table); 970 1.1 riastrad if (status != AE_OK) 971 1.1 riastrad pr_warn("DSDT table not found for OEM information\n"); 972 1.1 riastrad else { 973 1.1 riastrad crat_table->oem_revision = acpi_table->revision; 974 1.1 riastrad memcpy(crat_table->oem_id, acpi_table->oem_id, 975 1.1 riastrad CRAT_OEMID_LENGTH); 976 1.1 riastrad memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, 977 1.1 riastrad CRAT_OEMTABLEID_LENGTH); 978 1.1 riastrad } 979 1.1 riastrad crat_table->total_entries = 0; 980 1.1 riastrad crat_table->num_domains = 0; 981 1.1 riastrad 982 1.1 riastrad sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 983 1.1 riastrad 984 1.1 riastrad for_each_online_node(numa_node_id) { 985 1.1 riastrad if (kfd_numa_node_to_apic_id(numa_node_id) == -1) 986 1.1 riastrad continue; 987 1.1 riastrad 988 1.1 riastrad /* Fill in Subtype: Compute Unit */ 989 1.1 riastrad ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, 990 1.1 riastrad crat_table->num_domains, 991 1.1 riastrad (struct crat_subtype_computeunit *)sub_type_hdr); 992 1.1 riastrad if (ret < 0) 993 1.1 riastrad return ret; 994 1.1 riastrad crat_table->length += sub_type_hdr->length; 995 1.1 riastrad crat_table->total_entries++; 996 1.1 riastrad 997 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 998 1.1 riastrad sub_type_hdr->length); 999 1.1 riastrad 1000 1.1 riastrad /* Fill in Subtype: Memory */ 1001 1.1 riastrad ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, 1002 1.1 riastrad crat_table->num_domains, 1003 1.1 riastrad (struct crat_subtype_memory *)sub_type_hdr); 1004 1.1 riastrad if (ret < 0) 1005 1.1 riastrad return ret; 1006 1.1 riastrad crat_table->length += sub_type_hdr->length; 1007 1.1 riastrad crat_table->total_entries++; 1008 1.1 riastrad 1009 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1010 1.1 riastrad sub_type_hdr->length); 1011 1.1 riastrad 1012 1.1 riastrad /* Fill in Subtype: IO Link */ 1013 1.1 riastrad #ifdef CONFIG_X86_64 1014 1.1 riastrad ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, 1015 1.1 riastrad &entries, 1016 1.1 riastrad (struct crat_subtype_iolink *)sub_type_hdr); 1017 1.1 riastrad if (ret < 0) 1018 1.1 riastrad return ret; 1019 1.1 riastrad crat_table->length += (sub_type_hdr->length * entries); 1020 1.1 riastrad crat_table->total_entries += entries; 1021 1.1 riastrad 1022 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1023 1.1 riastrad sub_type_hdr->length * entries); 1024 1.1 riastrad #else 1025 1.1 riastrad pr_info("IO link not available for non x86 platforms\n"); 1026 1.1 riastrad #endif 1027 1.1 riastrad 1028 1.1 riastrad crat_table->num_domains++; 1029 1.1 riastrad } 1030 1.1 riastrad 1031 1.1 riastrad /* TODO: Add cache Subtype for CPU. 1032 1.1 riastrad * Currently, CPU cache information is available in function 1033 1.1 riastrad * detect_cache_attributes(cpu) defined in the file 1034 1.1 riastrad * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not 1035 1.1 riastrad * exported and to get the same information the code needs to be 1036 1.1 riastrad * duplicated. 1037 1.1 riastrad */ 1038 1.1 riastrad 1039 1.1 riastrad *size = crat_table->length; 1040 1.1 riastrad pr_info("Virtual CRAT table created for CPU\n"); 1041 1.1 riastrad 1042 1.1 riastrad return 0; 1043 1.1 riastrad } 1044 1.1 riastrad 1045 1.1 riastrad static int kfd_fill_gpu_memory_affinity(int *avail_size, 1046 1.1 riastrad struct kfd_dev *kdev, uint8_t type, uint64_t size, 1047 1.1 riastrad struct crat_subtype_memory *sub_type_hdr, 1048 1.1 riastrad uint32_t proximity_domain, 1049 1.1 riastrad const struct kfd_local_mem_info *local_mem_info) 1050 1.1 riastrad { 1051 1.1 riastrad *avail_size -= sizeof(struct crat_subtype_memory); 1052 1.1 riastrad if (*avail_size < 0) 1053 1.1 riastrad return -ENOMEM; 1054 1.1 riastrad 1055 1.1 riastrad memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 1056 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 1057 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_memory); 1058 1.1 riastrad sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 1059 1.1 riastrad 1060 1.1 riastrad sub_type_hdr->proximity_domain = proximity_domain; 1061 1.1 riastrad 1062 1.1 riastrad pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", 1063 1.1 riastrad type, size); 1064 1.1 riastrad 1065 1.1 riastrad sub_type_hdr->length_low = lower_32_bits(size); 1066 1.1 riastrad sub_type_hdr->length_high = upper_32_bits(size); 1067 1.1 riastrad 1068 1.1 riastrad sub_type_hdr->width = local_mem_info->vram_width; 1069 1.1 riastrad sub_type_hdr->visibility_type = type; 1070 1.1 riastrad 1071 1.1 riastrad return 0; 1072 1.1 riastrad } 1073 1.1 riastrad 1074 1.1 riastrad /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU 1075 1.1 riastrad * to its NUMA node 1076 1.1 riastrad * @avail_size: Available size in the memory 1077 1.1 riastrad * @kdev - [IN] GPU device 1078 1.1 riastrad * @sub_type_hdr: Memory into which io link info will be filled in 1079 1.1 riastrad * @proximity_domain - proximity domain of the GPU node 1080 1.1 riastrad * 1081 1.1 riastrad * Return 0 if successful else return -ve value 1082 1.1 riastrad */ 1083 1.1 riastrad static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, 1084 1.1 riastrad struct kfd_dev *kdev, 1085 1.1 riastrad struct crat_subtype_iolink *sub_type_hdr, 1086 1.1 riastrad uint32_t proximity_domain) 1087 1.1 riastrad { 1088 1.1 riastrad *avail_size -= sizeof(struct crat_subtype_iolink); 1089 1.1 riastrad if (*avail_size < 0) 1090 1.1 riastrad return -ENOMEM; 1091 1.1 riastrad 1092 1.1 riastrad memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 1093 1.1 riastrad 1094 1.1 riastrad /* Fill in subtype header data */ 1095 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 1096 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 1097 1.1 riastrad sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 1098 1.1 riastrad if (kfd_dev_is_large_bar(kdev)) 1099 1.1 riastrad sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 1100 1.1 riastrad 1101 1.1 riastrad /* Fill in IOLINK subtype. 1102 1.1 riastrad * TODO: Fill-in other fields of iolink subtype 1103 1.1 riastrad */ 1104 1.1 riastrad sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; 1105 1.1 riastrad sub_type_hdr->proximity_domain_from = proximity_domain; 1106 1.1 riastrad #ifdef CONFIG_NUMA 1107 1.1 riastrad if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) 1108 1.1 riastrad sub_type_hdr->proximity_domain_to = 0; 1109 1.1 riastrad else 1110 1.1 riastrad sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; 1111 1.1 riastrad #else 1112 1.1 riastrad sub_type_hdr->proximity_domain_to = 0; 1113 1.1 riastrad #endif 1114 1.1 riastrad return 0; 1115 1.1 riastrad } 1116 1.1 riastrad 1117 1.1 riastrad static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, 1118 1.1 riastrad struct kfd_dev *kdev, 1119 1.1 riastrad struct kfd_dev *peer_kdev, 1120 1.1 riastrad struct crat_subtype_iolink *sub_type_hdr, 1121 1.1 riastrad uint32_t proximity_domain_from, 1122 1.1 riastrad uint32_t proximity_domain_to) 1123 1.1 riastrad { 1124 1.1 riastrad *avail_size -= sizeof(struct crat_subtype_iolink); 1125 1.1 riastrad if (*avail_size < 0) 1126 1.1 riastrad return -ENOMEM; 1127 1.1 riastrad 1128 1.1 riastrad memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 1129 1.1 riastrad 1130 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 1131 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 1132 1.1 riastrad sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | 1133 1.1 riastrad CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 1134 1.1 riastrad 1135 1.1 riastrad sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 1136 1.1 riastrad sub_type_hdr->proximity_domain_from = proximity_domain_from; 1137 1.1 riastrad sub_type_hdr->proximity_domain_to = proximity_domain_to; 1138 1.1 riastrad sub_type_hdr->num_hops_xgmi = 1139 1.1 riastrad amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); 1140 1.1 riastrad return 0; 1141 1.1 riastrad } 1142 1.1 riastrad 1143 1.1 riastrad /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU 1144 1.1 riastrad * 1145 1.1 riastrad * @pcrat_image: Fill in VCRAT for GPU 1146 1.1 riastrad * @size: [IN] allocated size of crat_image. 1147 1.1 riastrad * [OUT] actual size of data filled in crat_image 1148 1.1 riastrad */ 1149 1.1 riastrad static int kfd_create_vcrat_image_gpu(void *pcrat_image, 1150 1.1 riastrad size_t *size, struct kfd_dev *kdev, 1151 1.1 riastrad uint32_t proximity_domain) 1152 1.1 riastrad { 1153 1.1 riastrad struct crat_header *crat_table = (struct crat_header *)pcrat_image; 1154 1.1 riastrad struct crat_subtype_generic *sub_type_hdr; 1155 1.1 riastrad struct kfd_local_mem_info local_mem_info; 1156 1.1 riastrad struct kfd_topology_device *peer_dev; 1157 1.1 riastrad struct crat_subtype_computeunit *cu; 1158 1.1 riastrad struct kfd_cu_info cu_info; 1159 1.1 riastrad int avail_size = *size; 1160 1.1 riastrad uint32_t total_num_of_cu; 1161 1.1 riastrad int num_of_cache_entries = 0; 1162 1.1 riastrad int cache_mem_filled = 0; 1163 1.1 riastrad uint32_t nid = 0; 1164 1.1 riastrad int ret = 0; 1165 1.1 riastrad 1166 1.1 riastrad if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) 1167 1.1 riastrad return -EINVAL; 1168 1.1 riastrad 1169 1.1 riastrad /* Fill the CRAT Header. 1170 1.1 riastrad * Modify length and total_entries as subunits are added. 1171 1.1 riastrad */ 1172 1.1 riastrad avail_size -= sizeof(struct crat_header); 1173 1.1 riastrad if (avail_size < 0) 1174 1.1 riastrad return -ENOMEM; 1175 1.1 riastrad 1176 1.1 riastrad memset(crat_table, 0, sizeof(struct crat_header)); 1177 1.1 riastrad 1178 1.1 riastrad memcpy(&crat_table->signature, CRAT_SIGNATURE, 1179 1.1 riastrad sizeof(crat_table->signature)); 1180 1.1 riastrad /* Change length as we add more subtypes*/ 1181 1.1 riastrad crat_table->length = sizeof(struct crat_header); 1182 1.1 riastrad crat_table->num_domains = 1; 1183 1.1 riastrad crat_table->total_entries = 0; 1184 1.1 riastrad 1185 1.1 riastrad /* Fill in Subtype: Compute Unit 1186 1.1 riastrad * First fill in the sub type header and then sub type data 1187 1.1 riastrad */ 1188 1.1 riastrad avail_size -= sizeof(struct crat_subtype_computeunit); 1189 1.1 riastrad if (avail_size < 0) 1190 1.1 riastrad return -ENOMEM; 1191 1.1 riastrad 1192 1.1 riastrad sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); 1193 1.1 riastrad memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 1194 1.1 riastrad 1195 1.1 riastrad sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 1196 1.1 riastrad sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 1197 1.1 riastrad sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1198 1.1 riastrad 1199 1.1 riastrad /* Fill CU subtype data */ 1200 1.1 riastrad cu = (struct crat_subtype_computeunit *)sub_type_hdr; 1201 1.1 riastrad cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; 1202 1.1 riastrad cu->proximity_domain = proximity_domain; 1203 1.1 riastrad 1204 1.1 riastrad amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); 1205 1.1 riastrad cu->num_simd_per_cu = cu_info.simd_per_cu; 1206 1.1 riastrad cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; 1207 1.1 riastrad cu->max_waves_simd = cu_info.max_waves_per_simd; 1208 1.1 riastrad 1209 1.1 riastrad cu->wave_front_size = cu_info.wave_front_size; 1210 1.1 riastrad cu->array_count = cu_info.num_shader_arrays_per_engine * 1211 1.1 riastrad cu_info.num_shader_engines; 1212 1.1 riastrad total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); 1213 1.1 riastrad cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); 1214 1.1 riastrad cu->num_cu_per_array = cu_info.num_cu_per_sh; 1215 1.1 riastrad cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; 1216 1.1 riastrad cu->num_banks = cu_info.num_shader_engines; 1217 1.1 riastrad cu->lds_size_in_kb = cu_info.lds_size; 1218 1.1 riastrad 1219 1.1 riastrad cu->hsa_capability = 0; 1220 1.1 riastrad 1221 1.1 riastrad /* Check if this node supports IOMMU. During parsing this flag will 1222 1.1 riastrad * translate to HSA_CAP_ATS_PRESENT 1223 1.1 riastrad */ 1224 1.1 riastrad if (!kfd_iommu_check_device(kdev)) 1225 1.1 riastrad cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; 1226 1.1 riastrad 1227 1.1 riastrad crat_table->length += sub_type_hdr->length; 1228 1.1 riastrad crat_table->total_entries++; 1229 1.1 riastrad 1230 1.1 riastrad /* Fill in Subtype: Memory. Only on systems with large BAR (no 1231 1.1 riastrad * private FB), report memory as public. On other systems 1232 1.1 riastrad * report the total FB size (public+private) as a single 1233 1.1 riastrad * private heap. 1234 1.1 riastrad */ 1235 1.1 riastrad amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); 1236 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1237 1.1 riastrad sub_type_hdr->length); 1238 1.1 riastrad 1239 1.1 riastrad if (debug_largebar) 1240 1.1 riastrad local_mem_info.local_mem_size_private = 0; 1241 1.1 riastrad 1242 1.1 riastrad if (local_mem_info.local_mem_size_private == 0) 1243 1.1 riastrad ret = kfd_fill_gpu_memory_affinity(&avail_size, 1244 1.1 riastrad kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, 1245 1.1 riastrad local_mem_info.local_mem_size_public, 1246 1.1 riastrad (struct crat_subtype_memory *)sub_type_hdr, 1247 1.1 riastrad proximity_domain, 1248 1.1 riastrad &local_mem_info); 1249 1.1 riastrad else 1250 1.1 riastrad ret = kfd_fill_gpu_memory_affinity(&avail_size, 1251 1.1 riastrad kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, 1252 1.1 riastrad local_mem_info.local_mem_size_public + 1253 1.1 riastrad local_mem_info.local_mem_size_private, 1254 1.1 riastrad (struct crat_subtype_memory *)sub_type_hdr, 1255 1.1 riastrad proximity_domain, 1256 1.1 riastrad &local_mem_info); 1257 1.1 riastrad if (ret < 0) 1258 1.1 riastrad return ret; 1259 1.1 riastrad 1260 1.1 riastrad crat_table->length += sizeof(struct crat_subtype_memory); 1261 1.1 riastrad crat_table->total_entries++; 1262 1.1 riastrad 1263 1.1 riastrad /* TODO: Fill in cache information. This information is NOT readily 1264 1.1 riastrad * available in KGD 1265 1.1 riastrad */ 1266 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1267 1.1 riastrad sub_type_hdr->length); 1268 1.1 riastrad ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, 1269 1.1 riastrad avail_size, 1270 1.1 riastrad &cu_info, 1271 1.1 riastrad (struct crat_subtype_cache *)sub_type_hdr, 1272 1.1 riastrad &cache_mem_filled, 1273 1.1 riastrad &num_of_cache_entries); 1274 1.1 riastrad 1275 1.1 riastrad if (ret < 0) 1276 1.1 riastrad return ret; 1277 1.1 riastrad 1278 1.1 riastrad crat_table->length += cache_mem_filled; 1279 1.1 riastrad crat_table->total_entries += num_of_cache_entries; 1280 1.1 riastrad avail_size -= cache_mem_filled; 1281 1.1 riastrad 1282 1.1 riastrad /* Fill in Subtype: IO_LINKS 1283 1.1 riastrad * Only direct links are added here which is Link from GPU to 1284 1.1 riastrad * to its NUMA node. Indirect links are added by userspace. 1285 1.1 riastrad */ 1286 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1287 1.1 riastrad cache_mem_filled); 1288 1.1 riastrad ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, 1289 1.1 riastrad (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); 1290 1.1 riastrad 1291 1.1 riastrad if (ret < 0) 1292 1.1 riastrad return ret; 1293 1.1 riastrad 1294 1.1 riastrad crat_table->length += sub_type_hdr->length; 1295 1.1 riastrad crat_table->total_entries++; 1296 1.1 riastrad 1297 1.1 riastrad 1298 1.1 riastrad /* Fill in Subtype: IO_LINKS 1299 1.1 riastrad * Direct links from GPU to other GPUs through xGMI. 1300 1.1 riastrad * We will loop GPUs that already be processed (with lower value 1301 1.1 riastrad * of proximity_domain), add the link for the GPUs with same 1302 1.1 riastrad * hive id (from this GPU to other GPU) . The reversed iolink 1303 1.1 riastrad * (from other GPU to this GPU) will be added 1304 1.1 riastrad * in kfd_parse_subtype_iolink. 1305 1.1 riastrad */ 1306 1.1 riastrad if (kdev->hive_id) { 1307 1.1 riastrad for (nid = 0; nid < proximity_domain; ++nid) { 1308 1.1 riastrad peer_dev = kfd_topology_device_by_proximity_domain(nid); 1309 1.1 riastrad if (!peer_dev->gpu) 1310 1.1 riastrad continue; 1311 1.1 riastrad if (peer_dev->gpu->hive_id != kdev->hive_id) 1312 1.1 riastrad continue; 1313 1.1 riastrad sub_type_hdr = (typeof(sub_type_hdr))( 1314 1.1 riastrad (char *)sub_type_hdr + 1315 1.1 riastrad sizeof(struct crat_subtype_iolink)); 1316 1.1 riastrad ret = kfd_fill_gpu_xgmi_link_to_gpu( 1317 1.1 riastrad &avail_size, kdev, peer_dev->gpu, 1318 1.1 riastrad (struct crat_subtype_iolink *)sub_type_hdr, 1319 1.1 riastrad proximity_domain, nid); 1320 1.1 riastrad if (ret < 0) 1321 1.1 riastrad return ret; 1322 1.1 riastrad crat_table->length += sub_type_hdr->length; 1323 1.1 riastrad crat_table->total_entries++; 1324 1.1 riastrad } 1325 1.1 riastrad } 1326 1.1 riastrad *size = crat_table->length; 1327 1.1 riastrad pr_info("Virtual CRAT table created for GPU\n"); 1328 1.1 riastrad 1329 1.1 riastrad return ret; 1330 1.1 riastrad } 1331 1.1 riastrad 1332 1.1 riastrad /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and 1333 1.1 riastrad * creates a Virtual CRAT (VCRAT) image 1334 1.1 riastrad * 1335 1.1 riastrad * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 1336 1.1 riastrad * 1337 1.1 riastrad * @crat_image: VCRAT image created because ACPI does not have a 1338 1.1 riastrad * CRAT for this device 1339 1.1 riastrad * @size: [OUT] size of virtual crat_image 1340 1.1 riastrad * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device 1341 1.1 riastrad * COMPUTE_UNIT_GPU - Create VCRAT for GPU 1342 1.1 riastrad * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU 1343 1.1 riastrad * -- this option is not currently implemented. 1344 1.1 riastrad * The assumption is that all AMD APUs will have CRAT 1345 1.1 riastrad * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU 1346 1.1 riastrad * 1347 1.1 riastrad * Return 0 if successful else return -ve value 1348 1.1 riastrad */ 1349 1.1 riastrad int kfd_create_crat_image_virtual(void **crat_image, size_t *size, 1350 1.1 riastrad int flags, struct kfd_dev *kdev, 1351 1.1 riastrad uint32_t proximity_domain) 1352 1.1 riastrad { 1353 1.1 riastrad void *pcrat_image = NULL; 1354 1.1 riastrad int ret = 0; 1355 1.1 riastrad 1356 1.1 riastrad if (!crat_image) 1357 1.1 riastrad return -EINVAL; 1358 1.1 riastrad 1359 1.1 riastrad *crat_image = NULL; 1360 1.1 riastrad 1361 1.1 riastrad /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and 1362 1.1 riastrad * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover 1363 1.1 riastrad * all the current conditions. A check is put not to overwrite beyond 1364 1.1 riastrad * allocated size 1365 1.1 riastrad */ 1366 1.1 riastrad switch (flags) { 1367 1.1 riastrad case COMPUTE_UNIT_CPU: 1368 1.1 riastrad pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); 1369 1.1 riastrad if (!pcrat_image) 1370 1.1 riastrad return -ENOMEM; 1371 1.1 riastrad *size = VCRAT_SIZE_FOR_CPU; 1372 1.1 riastrad ret = kfd_create_vcrat_image_cpu(pcrat_image, size); 1373 1.1 riastrad break; 1374 1.1 riastrad case COMPUTE_UNIT_GPU: 1375 1.1 riastrad if (!kdev) 1376 1.1 riastrad return -EINVAL; 1377 1.1 riastrad pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); 1378 1.1 riastrad if (!pcrat_image) 1379 1.1 riastrad return -ENOMEM; 1380 1.1 riastrad *size = VCRAT_SIZE_FOR_GPU; 1381 1.1 riastrad ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, 1382 1.1 riastrad proximity_domain); 1383 1.1 riastrad break; 1384 1.1 riastrad case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): 1385 1.1 riastrad /* TODO: */ 1386 1.1 riastrad ret = -EINVAL; 1387 1.1 riastrad pr_err("VCRAT not implemented for APU\n"); 1388 1.1 riastrad break; 1389 1.1 riastrad default: 1390 1.1 riastrad ret = -EINVAL; 1391 1.1 riastrad } 1392 1.1 riastrad 1393 1.1 riastrad if (!ret) 1394 1.1 riastrad *crat_image = pcrat_image; 1395 1.1 riastrad else 1396 1.1 riastrad kfree(pcrat_image); 1397 1.1 riastrad 1398 1.1 riastrad return ret; 1399 1.1 riastrad } 1400 1.1 riastrad 1401 1.1 riastrad 1402 1.1 riastrad /* kfd_destroy_crat_image 1403 1.1 riastrad * 1404 1.1 riastrad * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) 1405 1.1 riastrad * 1406 1.1 riastrad */ 1407 1.1 riastrad void kfd_destroy_crat_image(void *crat_image) 1408 1.1 riastrad { 1409 1.1 riastrad kfree(crat_image); 1410 1.1 riastrad } 1411