Lines Matching +full:gpu +full:- +full:id
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2015-2022 Advanced Micro Devices, Inc.
32 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
33 * GPU processor ID are expressed with Bit[31]=1.
34 * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
39 /* Return the next available gpu_processor_id and increment it for next GPU
40 * @total_cu_count - Total CUs present in the GPU including ones
171 /* L2 Data Cache per GPU (Total Tex Cache) */
214 /* L2 Data Cache per GPU (Total Tex Cache) */
257 /* L2 Data Cache per GPU (Total Tex Cache) */
300 /* L2 Data Cache per GPU (Total Tex Cache) */
343 /* L2 Data Cache per GPU (Total Tex Cache) */
386 /* L2 Data Cache per GPU (Total Tex Cache) */
439 /* L2 Data Cache per GPU (Total Tex Cache) */
492 /* L2 Data Cache per GPU (Total Tex Cache) */
545 /* L2 Data Cache per GPU (Total Tex Cache) */
598 /* L2 Data Cache per GPU (Total Tex Cache) */
608 /* L3 Data Cache per GPU */
661 /* L2 Data Cache per GPU (Total Tex Cache) */
671 /* L3 Data Cache per GPU */
724 /* L2 Data Cache per GPU (Total Tex Cache) */
734 /* L3 Data Cache per GPU */
787 /* L2 Data Cache per GPU (Total Tex Cache) */
797 /* L3 Data Cache per GPU */
850 /* L2 Data Cache per GPU (Total Tex Cache) */
903 /* L2 Data Cache per GPU (Total Tex Cache) */
956 /* L2 Data Cache per GPU (Total Tex Cache) */
1009 /* L2 Data Cache per GPU (Total Tex Cache) */
1023 dev->node_props.cpu_cores_count = cu->num_cpu_cores; in kfd_populated_cu_info_cpu()
1024 dev->node_props.cpu_core_id_base = cu->processor_id_low; in kfd_populated_cu_info_cpu()
1025 if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) in kfd_populated_cu_info_cpu()
1026 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; in kfd_populated_cu_info_cpu()
1028 pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, in kfd_populated_cu_info_cpu()
1029 cu->processor_id_low); in kfd_populated_cu_info_cpu()
1035 dev->node_props.simd_id_base = cu->processor_id_low; in kfd_populated_cu_info_gpu()
1036 dev->node_props.simd_count = cu->num_simd_cores; in kfd_populated_cu_info_gpu()
1037 dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; in kfd_populated_cu_info_gpu()
1038 dev->node_props.max_waves_per_simd = cu->max_waves_simd; in kfd_populated_cu_info_gpu()
1039 dev->node_props.wave_front_size = cu->wave_front_size; in kfd_populated_cu_info_gpu()
1040 dev->node_props.array_count = cu->array_count; in kfd_populated_cu_info_gpu()
1041 dev->node_props.cu_per_simd_array = cu->num_cu_per_array; in kfd_populated_cu_info_gpu()
1042 dev->node_props.simd_per_cu = cu->num_simd_per_cu; in kfd_populated_cu_info_gpu()
1043 dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; in kfd_populated_cu_info_gpu()
1044 if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) in kfd_populated_cu_info_gpu()
1045 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; in kfd_populated_cu_info_gpu()
1046 pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); in kfd_populated_cu_info_gpu()
1049 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
1058 cu->proximity_domain, cu->hsa_capability); in kfd_parse_subtype_cu()
1060 if (cu->proximity_domain == dev->proximity_domain) { in kfd_parse_subtype_cu()
1061 if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) in kfd_parse_subtype_cu()
1064 if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) in kfd_parse_subtype_cu()
1079 list_for_each_entry(props, &dev->mem_props, list) { in find_subtype_mem()
1080 if (props->heap_type == heap_type in find_subtype_mem()
1081 && props->flags == flags in find_subtype_mem()
1082 && props->width == width) in find_subtype_mem()
1088 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1102 mem->proximity_domain); in kfd_parse_subtype_mem()
1104 if (mem->proximity_domain == dev->proximity_domain) { in kfd_parse_subtype_mem()
1105 /* We're on GPU node */ in kfd_parse_subtype_mem()
1106 if (dev->node_props.cpu_cores_count == 0) { in kfd_parse_subtype_mem()
1108 if (mem->visibility_type == 0) in kfd_parse_subtype_mem()
1113 heap_type = mem->visibility_type; in kfd_parse_subtype_mem()
1117 if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) in kfd_parse_subtype_mem()
1119 if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) in kfd_parse_subtype_mem()
1123 ((uint64_t)mem->length_high << 32) + in kfd_parse_subtype_mem()
1124 mem->length_low; in kfd_parse_subtype_mem()
1125 width = mem->width; in kfd_parse_subtype_mem()
1134 props->size_in_bytes += size_in_bytes; in kfd_parse_subtype_mem()
1140 return -ENOMEM; in kfd_parse_subtype_mem()
1142 props->heap_type = heap_type; in kfd_parse_subtype_mem()
1143 props->flags = flags; in kfd_parse_subtype_mem()
1144 props->size_in_bytes = size_in_bytes; in kfd_parse_subtype_mem()
1145 props->width = width; in kfd_parse_subtype_mem()
1147 dev->node_props.mem_banks_count++; in kfd_parse_subtype_mem()
1148 list_add_tail(&props->list, &dev->mem_props); in kfd_parse_subtype_mem()
1157 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1165 uint32_t id; in kfd_parse_subtype_cache() local
1168 id = cache->processor_id_low; in kfd_parse_subtype_cache()
1170 pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); in kfd_parse_subtype_cache()
1172 total_num_of_cu = (dev->node_props.array_count * in kfd_parse_subtype_cache()
1173 dev->node_props.cu_per_simd_array); in kfd_parse_subtype_cache()
1176 * information as it is associated with a CPU core or GPU in kfd_parse_subtype_cache()
1177 * Compute Unit. So map the cache using CPU core Id or SIMD in kfd_parse_subtype_cache()
1178 * (GPU) ID. in kfd_parse_subtype_cache()
1183 if ((id >= dev->node_props.cpu_core_id_base && in kfd_parse_subtype_cache()
1184 id <= dev->node_props.cpu_core_id_base + in kfd_parse_subtype_cache()
1185 dev->node_props.cpu_cores_count) || in kfd_parse_subtype_cache()
1186 (id >= dev->node_props.simd_id_base && in kfd_parse_subtype_cache()
1187 id < dev->node_props.simd_id_base + in kfd_parse_subtype_cache()
1191 return -ENOMEM; in kfd_parse_subtype_cache()
1193 props->processor_id_low = id; in kfd_parse_subtype_cache()
1194 props->cache_level = cache->cache_level; in kfd_parse_subtype_cache()
1195 props->cache_size = cache->cache_size; in kfd_parse_subtype_cache()
1196 props->cacheline_size = cache->cache_line_size; in kfd_parse_subtype_cache()
1197 props->cachelines_per_tag = cache->lines_per_tag; in kfd_parse_subtype_cache()
1198 props->cache_assoc = cache->associativity; in kfd_parse_subtype_cache()
1199 props->cache_latency = cache->cache_latency; in kfd_parse_subtype_cache()
1201 memcpy(props->sibling_map, cache->sibling_map, in kfd_parse_subtype_cache()
1205 props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; in kfd_parse_subtype_cache()
1207 if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) in kfd_parse_subtype_cache()
1208 props->cache_type |= HSA_CACHE_TYPE_DATA; in kfd_parse_subtype_cache()
1209 if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) in kfd_parse_subtype_cache()
1210 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in kfd_parse_subtype_cache()
1211 if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) in kfd_parse_subtype_cache()
1212 props->cache_type |= HSA_CACHE_TYPE_CPU; in kfd_parse_subtype_cache()
1213 if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) in kfd_parse_subtype_cache()
1214 props->cache_type |= HSA_CACHE_TYPE_HSACU; in kfd_parse_subtype_cache()
1216 dev->node_props.caches_count++; in kfd_parse_subtype_cache()
1217 list_add_tail(&props->list, &dev->cache_props); in kfd_parse_subtype_cache()
1226 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1237 id_from = iolink->proximity_domain_from; in kfd_parse_subtype_iolink()
1238 id_to = iolink->proximity_domain_to; in kfd_parse_subtype_iolink()
1243 if (id_from == dev->proximity_domain) { in kfd_parse_subtype_iolink()
1246 return -ENOMEM; in kfd_parse_subtype_iolink()
1248 props->node_from = id_from; in kfd_parse_subtype_iolink()
1249 props->node_to = id_to; in kfd_parse_subtype_iolink()
1250 props->ver_maj = iolink->version_major; in kfd_parse_subtype_iolink()
1251 props->ver_min = iolink->version_minor; in kfd_parse_subtype_iolink()
1252 props->iolink_type = iolink->io_interface_type; in kfd_parse_subtype_iolink()
1254 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_parse_subtype_iolink()
1255 props->weight = 20; in kfd_parse_subtype_iolink()
1256 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_parse_subtype_iolink()
1257 props->weight = iolink->weight_xgmi; in kfd_parse_subtype_iolink()
1259 props->weight = node_distance(id_from, id_to); in kfd_parse_subtype_iolink()
1261 props->min_latency = iolink->minimum_latency; in kfd_parse_subtype_iolink()
1262 props->max_latency = iolink->maximum_latency; in kfd_parse_subtype_iolink()
1263 props->min_bandwidth = iolink->minimum_bandwidth_mbs; in kfd_parse_subtype_iolink()
1264 props->max_bandwidth = iolink->maximum_bandwidth_mbs; in kfd_parse_subtype_iolink()
1265 props->rec_transfer_size = in kfd_parse_subtype_iolink()
1266 iolink->recommended_transfer_size; in kfd_parse_subtype_iolink()
1268 dev->node_props.io_links_count++; in kfd_parse_subtype_iolink()
1269 list_add_tail(&props->list, &dev->io_link_props); in kfd_parse_subtype_iolink()
1274 /* CPU topology is created before GPUs are detected, so CPU->GPU in kfd_parse_subtype_iolink()
1276 * means a GPU is detected and we are adding GPU->CPU to the topology. in kfd_parse_subtype_iolink()
1277 * At this time, also add the corresponded CPU->GPU link if GPU in kfd_parse_subtype_iolink()
1282 if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { in kfd_parse_subtype_iolink()
1285 return -ENODEV; in kfd_parse_subtype_iolink()
1289 return -ENOMEM; in kfd_parse_subtype_iolink()
1291 props2->node_from = id_to; in kfd_parse_subtype_iolink()
1292 props2->node_to = id_from; in kfd_parse_subtype_iolink()
1293 props2->kobj = NULL; in kfd_parse_subtype_iolink()
1294 to_dev->node_props.io_links_count++; in kfd_parse_subtype_iolink()
1295 list_add_tail(&props2->list, &to_dev->io_link_props); in kfd_parse_subtype_iolink()
1301 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1303 * @sub_type_hdr - subtype section of crat_image
1304 * @device_list - list of topology devices present in this crat_image
1315 switch (sub_type_hdr->type) { in kfd_parse_subtype()
1346 sub_type_hdr->type); in kfd_parse_subtype()
1352 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1355 * @crat_image - input image containing CRAT
1356 * @device_list - [OUT] list of kfd_topology_device generated after
1358 * @proximity_domain - Proximity domain of the first device in the table
1360 * Return - 0 if successful else -ve value
1374 return -EINVAL; in kfd_parse_crat_table()
1378 return -EINVAL; in kfd_parse_crat_table()
1381 num_nodes = crat_table->num_domains; in kfd_parse_crat_table()
1382 image_len = crat_table->length; in kfd_parse_crat_table()
1390 top_dev->proximity_domain = proximity_domain++; in kfd_parse_crat_table()
1394 ret = -ENOMEM; in kfd_parse_crat_table()
1398 memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); in kfd_parse_crat_table()
1399 memcpy(top_dev->oem_table_id, crat_table->oem_table_id, in kfd_parse_crat_table()
1401 top_dev->oem_revision = crat_table->oem_revision; in kfd_parse_crat_table()
1406 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { in kfd_parse_crat_table()
1413 sub_type_hdr->length); in kfd_parse_crat_table()
1427 struct amdgpu_device *adev = kdev->adev; in kfd_fill_gpu_cache_info_from_gfx_config()
1431 if (adev->gfx.config.gc_tcp_l1_size) { in kfd_fill_gpu_cache_info_from_gfx_config()
1432 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1437 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1438 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1442 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config()
1444 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config()
1449 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1450 pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1454 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config()
1455 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config()
1460 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1461 pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1465 if (adev->gfx.config.gc_gl1c_per_sa && in kfd_fill_gpu_cache_info_from_gfx_config()
1466 adev->gfx.config.gc_gl1c_size_per_instance) { in kfd_fill_gpu_cache_info_from_gfx_config()
1467 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * in kfd_fill_gpu_cache_info_from_gfx_config()
1468 adev->gfx.config.gc_gl1c_size_per_instance; in kfd_fill_gpu_cache_info_from_gfx_config()
1473 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1477 /* L2 Data Cache per GPU (Total Tex Cache) */ in kfd_fill_gpu_cache_info_from_gfx_config()
1478 if (adev->gfx.config.gc_gl2c_per_gpu) { in kfd_fill_gpu_cache_info_from_gfx_config()
1479 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; in kfd_fill_gpu_cache_info_from_gfx_config()
1484 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1485 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1488 /* L3 Data Cache per GPU */ in kfd_fill_gpu_cache_info_from_gfx_config()
1489 if (adev->gmc.mall_size) { in kfd_fill_gpu_cache_info_from_gfx_config()
1490 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; in kfd_fill_gpu_cache_info_from_gfx_config()
1495 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1505 struct amdgpu_device *adev = kdev->adev; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1509 if (adev->gfx.config.gc_tcp_size_per_cu) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1510 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1519 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1521 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1526 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1530 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1531 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1536 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1539 /* L2 Data Cache per GPU (Total Tex Cache) */ in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1540 if (adev->gfx.config.gc_tcc_size) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1541 pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1546 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1549 /* L3 Data Cache per GPU */ in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1550 if (adev->gmc.mall_size) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1551 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1556 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1566 switch (kdev->adev->asic_type) { in kfd_get_gpu_cache_info()
1625 kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, in kfd_get_gpu_cache_info()
1691 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); in kfd_get_gpu_cache_info()
1705 * following amount is allocated for GPU Virtual CRAT. This is
1711 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1713 * @numa_node_id: CPU NUMA node id
1717 * Return 0 if successful else return -ve value
1725 *avail_size -= sizeof(struct crat_subtype_computeunit); in kfd_fill_cu_for_cpu()
1727 return -ENOMEM; in kfd_fill_cu_for_cpu()
1732 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; in kfd_fill_cu_for_cpu()
1733 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); in kfd_fill_cu_for_cpu()
1734 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_cu_for_cpu()
1739 sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; in kfd_fill_cu_for_cpu()
1740 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_cu_for_cpu()
1741 sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); in kfd_fill_cu_for_cpu()
1742 if (sub_type_hdr->processor_id_low == -1) in kfd_fill_cu_for_cpu()
1743 return -EINVAL; in kfd_fill_cu_for_cpu()
1745 sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); in kfd_fill_cu_for_cpu()
1750 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1752 * @numa_node_id: CPU NUMA node id
1756 * Return 0 if successful else return -ve value
1766 *avail_size -= sizeof(struct crat_subtype_memory); in kfd_fill_mem_info_for_cpu()
1768 return -ENOMEM; in kfd_fill_mem_info_for_cpu()
1773 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; in kfd_fill_mem_info_for_cpu()
1774 sub_type_hdr->length = sizeof(struct crat_subtype_memory); in kfd_fill_mem_info_for_cpu()
1775 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_mem_info_for_cpu()
1785 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); in kfd_fill_mem_info_for_cpu()
1788 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); in kfd_fill_mem_info_for_cpu()
1789 sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); in kfd_fill_mem_info_for_cpu()
1790 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_mem_info_for_cpu()
1804 if (c->x86_vendor == X86_VENDOR_AMD) in kfd_fill_iolink_info_for_cpu()
1816 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_iolink_info_for_cpu()
1818 return -ENOMEM; in kfd_fill_iolink_info_for_cpu()
1823 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_iolink_info_for_cpu()
1824 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_iolink_info_for_cpu()
1825 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_iolink_info_for_cpu()
1828 sub_type_hdr->proximity_domain_from = numa_node_id; in kfd_fill_iolink_info_for_cpu()
1829 sub_type_hdr->proximity_domain_to = nid; in kfd_fill_iolink_info_for_cpu()
1830 sub_type_hdr->io_interface_type = link_type; in kfd_fill_iolink_info_for_cpu()
1840 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1860 return -EINVAL; in kfd_create_vcrat_image_cpu()
1865 avail_size -= sizeof(struct crat_header); in kfd_create_vcrat_image_cpu()
1867 return -ENOMEM; in kfd_create_vcrat_image_cpu()
1870 memcpy(&crat_table->signature, CRAT_SIGNATURE, in kfd_create_vcrat_image_cpu()
1871 sizeof(crat_table->signature)); in kfd_create_vcrat_image_cpu()
1872 crat_table->length = sizeof(struct crat_header); in kfd_create_vcrat_image_cpu()
1878 crat_table->oem_revision = acpi_table->revision; in kfd_create_vcrat_image_cpu()
1879 memcpy(crat_table->oem_id, acpi_table->oem_id, in kfd_create_vcrat_image_cpu()
1881 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, in kfd_create_vcrat_image_cpu()
1885 crat_table->total_entries = 0; in kfd_create_vcrat_image_cpu()
1886 crat_table->num_domains = 0; in kfd_create_vcrat_image_cpu()
1891 if (kfd_numa_node_to_apic_id(numa_node_id) == -1) in kfd_create_vcrat_image_cpu()
1896 crat_table->num_domains, in kfd_create_vcrat_image_cpu()
1900 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_cpu()
1901 crat_table->total_entries++; in kfd_create_vcrat_image_cpu()
1904 sub_type_hdr->length); in kfd_create_vcrat_image_cpu()
1908 crat_table->num_domains, in kfd_create_vcrat_image_cpu()
1912 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_cpu()
1913 crat_table->total_entries++; in kfd_create_vcrat_image_cpu()
1916 sub_type_hdr->length); in kfd_create_vcrat_image_cpu()
1927 crat_table->length += (sub_type_hdr->length * entries); in kfd_create_vcrat_image_cpu()
1928 crat_table->total_entries += entries; in kfd_create_vcrat_image_cpu()
1931 sub_type_hdr->length * entries); in kfd_create_vcrat_image_cpu()
1937 crat_table->num_domains++; in kfd_create_vcrat_image_cpu()
1948 *size = crat_table->length; in kfd_create_vcrat_image_cpu()
1960 *avail_size -= sizeof(struct crat_subtype_memory); in kfd_fill_gpu_memory_affinity()
1962 return -ENOMEM; in kfd_fill_gpu_memory_affinity()
1965 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; in kfd_fill_gpu_memory_affinity()
1966 sub_type_hdr->length = sizeof(struct crat_subtype_memory); in kfd_fill_gpu_memory_affinity()
1967 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_gpu_memory_affinity()
1969 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_gpu_memory_affinity()
1971 pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", in kfd_fill_gpu_memory_affinity()
1974 sub_type_hdr->length_low = lower_32_bits(size); in kfd_fill_gpu_memory_affinity()
1975 sub_type_hdr->length_high = upper_32_bits(size); in kfd_fill_gpu_memory_affinity()
1977 sub_type_hdr->width = local_mem_info->vram_width; in kfd_fill_gpu_memory_affinity()
1978 sub_type_hdr->visibility_type = type; in kfd_fill_gpu_memory_affinity()
1989 u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | in kfd_find_numa_node_in_srat()
1990 pci_dev_id(kdev->adev->pdev); in kfd_find_numa_node_in_srat()
1994 struct acpi_srat_generic_affinity *gpu; in kfd_find_numa_node_in_srat() local
2010 table_end = (unsigned long)table_header + table_header->length; in kfd_find_numa_node_in_srat()
2016 subtable_len = sub_header->length; in kfd_find_numa_node_in_srat()
2028 switch (sub_header->type) { in kfd_find_numa_node_in_srat()
2031 pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | in kfd_find_numa_node_in_srat()
2032 cpu->proximity_domain_lo; in kfd_find_numa_node_in_srat()
2037 gpu = (struct acpi_srat_generic_affinity *)sub_header; in kfd_find_numa_node_in_srat()
2038 bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | in kfd_find_numa_node_in_srat()
2039 *((u16 *)(&gpu->device_handle[2])); in kfd_find_numa_node_in_srat()
2042 numa_node = pxm_to_node(gpu->proximity_domain); in kfd_find_numa_node_in_srat()
2054 subtable_len = sub_header->length; in kfd_find_numa_node_in_srat()
2059 /* Workaround bad cpu-gpu binding case */ in kfd_find_numa_node_in_srat()
2065 set_dev_node(&kdev->adev->pdev->dev, numa_node); in kfd_find_numa_node_in_srat()
2072 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
2075 * @kdev - [IN] GPU device
2077 * @proximity_domain - proximity domain of the GPU node
2079 * Return 0 if successful else return -ve value
2086 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_direct_io_link_to_cpu()
2088 return -ENOMEM; in kfd_fill_gpu_direct_io_link_to_cpu()
2093 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_gpu_direct_io_link_to_cpu()
2094 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_direct_io_link_to_cpu()
2095 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_gpu_direct_io_link_to_cpu()
2097 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; in kfd_fill_gpu_direct_io_link_to_cpu()
2100 * TODO: Fill-in other fields of iolink subtype in kfd_fill_gpu_direct_io_link_to_cpu()
2102 if (kdev->adev->gmc.xgmi.connected_to_cpu || in kfd_fill_gpu_direct_io_link_to_cpu()
2104 kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) == in kfd_fill_gpu_direct_io_link_to_cpu()
2110 kdev->adev, NULL, true) : mem_bw; in kfd_fill_gpu_direct_io_link_to_cpu()
2113 * with host gpu xgmi link, host can access gpu memory whether in kfd_fill_gpu_direct_io_link_to_cpu()
2117 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; in kfd_fill_gpu_direct_io_link_to_cpu()
2118 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; in kfd_fill_gpu_direct_io_link_to_cpu()
2119 sub_type_hdr->weight_xgmi = weight; in kfd_fill_gpu_direct_io_link_to_cpu()
2120 sub_type_hdr->minimum_bandwidth_mbs = bandwidth; in kfd_fill_gpu_direct_io_link_to_cpu()
2121 sub_type_hdr->maximum_bandwidth_mbs = bandwidth; in kfd_fill_gpu_direct_io_link_to_cpu()
2123 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; in kfd_fill_gpu_direct_io_link_to_cpu()
2124 sub_type_hdr->minimum_bandwidth_mbs = in kfd_fill_gpu_direct_io_link_to_cpu()
2125 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); in kfd_fill_gpu_direct_io_link_to_cpu()
2126 sub_type_hdr->maximum_bandwidth_mbs = in kfd_fill_gpu_direct_io_link_to_cpu()
2127 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); in kfd_fill_gpu_direct_io_link_to_cpu()
2130 sub_type_hdr->proximity_domain_from = proximity_domain; in kfd_fill_gpu_direct_io_link_to_cpu()
2133 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE && in kfd_fill_gpu_direct_io_link_to_cpu()
2138 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) in kfd_fill_gpu_direct_io_link_to_cpu()
2139 sub_type_hdr->proximity_domain_to = 0; in kfd_fill_gpu_direct_io_link_to_cpu()
2141 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; in kfd_fill_gpu_direct_io_link_to_cpu()
2143 sub_type_hdr->proximity_domain_to = 0; in kfd_fill_gpu_direct_io_link_to_cpu()
2155 bool use_ta_info = kdev->kfd->num_nodes == 1; in kfd_fill_gpu_xgmi_link_to_gpu()
2157 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_xgmi_link_to_gpu()
2159 return -ENOMEM; in kfd_fill_gpu_xgmi_link_to_gpu()
2163 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_gpu_xgmi_link_to_gpu()
2164 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_xgmi_link_to_gpu()
2165 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | in kfd_fill_gpu_xgmi_link_to_gpu()
2168 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; in kfd_fill_gpu_xgmi_link_to_gpu()
2169 sub_type_hdr->proximity_domain_from = proximity_domain_from; in kfd_fill_gpu_xgmi_link_to_gpu()
2170 sub_type_hdr->proximity_domain_to = proximity_domain_to; in kfd_fill_gpu_xgmi_link_to_gpu()
2173 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * in kfd_fill_gpu_xgmi_link_to_gpu()
2174 amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); in kfd_fill_gpu_xgmi_link_to_gpu()
2175 sub_type_hdr->maximum_bandwidth_mbs = in kfd_fill_gpu_xgmi_link_to_gpu()
2176 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, in kfd_fill_gpu_xgmi_link_to_gpu()
2177 peer_kdev->adev, false); in kfd_fill_gpu_xgmi_link_to_gpu()
2178 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? in kfd_fill_gpu_xgmi_link_to_gpu()
2179 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2181 bool is_single_hop = kdev->kfd == peer_kdev->kfd; in kfd_fill_gpu_xgmi_link_to_gpu()
2186 sub_type_hdr->weight_xgmi = weight; in kfd_fill_gpu_xgmi_link_to_gpu()
2187 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2188 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2194 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2196 * @pcrat_image: Fill in VCRAT for GPU
2205 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_create_vcrat_image_gpu()
2206 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_create_vcrat_image_gpu()
2217 return -EINVAL; in kfd_create_vcrat_image_gpu()
2222 avail_size -= sizeof(struct crat_header); in kfd_create_vcrat_image_gpu()
2225 memcpy(&crat_table->signature, CRAT_SIGNATURE, in kfd_create_vcrat_image_gpu()
2226 sizeof(crat_table->signature)); in kfd_create_vcrat_image_gpu()
2228 crat_table->length = sizeof(struct crat_header); in kfd_create_vcrat_image_gpu()
2229 crat_table->num_domains = 1; in kfd_create_vcrat_image_gpu()
2230 crat_table->total_entries = 0; in kfd_create_vcrat_image_gpu()
2235 avail_size -= sizeof(struct crat_subtype_computeunit); in kfd_create_vcrat_image_gpu()
2239 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; in kfd_create_vcrat_image_gpu()
2240 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); in kfd_create_vcrat_image_gpu()
2241 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_create_vcrat_image_gpu()
2245 cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; in kfd_create_vcrat_image_gpu()
2246 cu->proximity_domain = proximity_domain; in kfd_create_vcrat_image_gpu()
2248 cu->num_simd_per_cu = cu_info->simd_per_cu; in kfd_create_vcrat_image_gpu()
2249 cu->num_simd_cores = cu_info->simd_per_cu * in kfd_create_vcrat_image_gpu()
2250 (cu_info->number / kdev->kfd->num_nodes); in kfd_create_vcrat_image_gpu()
2251 cu->max_waves_simd = cu_info->max_waves_per_simd; in kfd_create_vcrat_image_gpu()
2253 cu->wave_front_size = cu_info->wave_front_size; in kfd_create_vcrat_image_gpu()
2254 cu->array_count = gfx_info->max_sh_per_se * in kfd_create_vcrat_image_gpu()
2255 gfx_info->max_shader_engines; in kfd_create_vcrat_image_gpu()
2256 total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh); in kfd_create_vcrat_image_gpu()
2257 cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); in kfd_create_vcrat_image_gpu()
2258 cu->num_cu_per_array = gfx_info->max_cu_per_sh; in kfd_create_vcrat_image_gpu()
2259 cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu; in kfd_create_vcrat_image_gpu()
2260 cu->num_banks = gfx_info->max_shader_engines; in kfd_create_vcrat_image_gpu()
2261 cu->lds_size_in_kb = cu_info->lds_size; in kfd_create_vcrat_image_gpu()
2263 cu->hsa_capability = 0; in kfd_create_vcrat_image_gpu()
2265 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2266 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2273 local_mem_info = kdev->local_mem_info; in kfd_create_vcrat_image_gpu()
2275 sub_type_hdr->length); in kfd_create_vcrat_image_gpu()
2277 if (kdev->adev->debug_largebar) in kfd_create_vcrat_image_gpu()
2298 crat_table->length += sizeof(struct crat_subtype_memory); in kfd_create_vcrat_image_gpu()
2299 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2302 * Only direct links are added here which is Link from GPU to in kfd_create_vcrat_image_gpu()
2306 sub_type_hdr->length); in kfd_create_vcrat_image_gpu()
2313 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2314 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2318 * Direct links from GPU to other GPUs through xGMI. in kfd_create_vcrat_image_gpu()
2321 * hive id (from this GPU to other GPU) . The reversed iolink in kfd_create_vcrat_image_gpu()
2322 * (from other GPU to this GPU) will be added in kfd_create_vcrat_image_gpu()
2325 if (kdev->kfd->hive_id) { in kfd_create_vcrat_image_gpu()
2328 if (!peer_dev->gpu) in kfd_create_vcrat_image_gpu()
2330 if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) in kfd_create_vcrat_image_gpu()
2336 &avail_size, kdev, peer_dev->gpu, in kfd_create_vcrat_image_gpu()
2341 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2342 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2345 *size = crat_table->length; in kfd_create_vcrat_image_gpu()
2346 pr_info("Virtual CRAT table created for GPU\n"); in kfd_create_vcrat_image_gpu()
2351 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2359 * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2360 * COMPUTE_UNIT_GPU - Create VCRAT for GPU
2361 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2362 * -- this option is not currently implemented.
2366 * Return 0 if successful else return -ve value
2377 return -EINVAL; in kfd_create_crat_image_virtual()
2382 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. in kfd_create_crat_image_virtual()
2392 (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); in kfd_create_crat_image_virtual()
2395 return -ENOMEM; in kfd_create_crat_image_virtual()
2402 return -EINVAL; in kfd_create_crat_image_virtual()
2405 return -ENOMEM; in kfd_create_crat_image_virtual()
2412 ret = -EINVAL; in kfd_create_crat_image_virtual()
2416 ret = -EINVAL; in kfd_create_crat_image_virtual()
2430 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)