Lines Matching +full:gfx +full:- +full:mem

1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2015-2022 Advanced Micro Devices, Inc.
41 * @total_cu_count - Total CUs present in the GPU including ones
1024 dev->node_props.cpu_cores_count = cu->num_cpu_cores; in kfd_populated_cu_info_cpu()
1025 dev->node_props.cpu_core_id_base = cu->processor_id_low; in kfd_populated_cu_info_cpu()
1026 if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) in kfd_populated_cu_info_cpu()
1027 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; in kfd_populated_cu_info_cpu()
1029 pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, in kfd_populated_cu_info_cpu()
1030 cu->processor_id_low); in kfd_populated_cu_info_cpu()
1036 dev->node_props.simd_id_base = cu->processor_id_low; in kfd_populated_cu_info_gpu()
1037 dev->node_props.simd_count = cu->num_simd_cores; in kfd_populated_cu_info_gpu()
1038 dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; in kfd_populated_cu_info_gpu()
1039 dev->node_props.max_waves_per_simd = cu->max_waves_simd; in kfd_populated_cu_info_gpu()
1040 dev->node_props.wave_front_size = cu->wave_front_size; in kfd_populated_cu_info_gpu()
1041 dev->node_props.array_count = cu->array_count; in kfd_populated_cu_info_gpu()
1042 dev->node_props.cu_per_simd_array = cu->num_cu_per_array; in kfd_populated_cu_info_gpu()
1043 dev->node_props.simd_per_cu = cu->num_simd_per_cu; in kfd_populated_cu_info_gpu()
1044 dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; in kfd_populated_cu_info_gpu()
1045 if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) in kfd_populated_cu_info_gpu()
1046 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; in kfd_populated_cu_info_gpu()
1047 pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); in kfd_populated_cu_info_gpu()
1050 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
1059 cu->proximity_domain, cu->hsa_capability); in kfd_parse_subtype_cu()
1061 if (cu->proximity_domain == dev->proximity_domain) { in kfd_parse_subtype_cu()
1062 if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) in kfd_parse_subtype_cu()
1065 if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) in kfd_parse_subtype_cu()
1080 list_for_each_entry(props, &dev->mem_props, list) { in find_subtype_mem()
1081 if (props->heap_type == heap_type in find_subtype_mem()
1082 && props->flags == flags in find_subtype_mem()
1083 && props->width == width) in find_subtype_mem()
1089 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1092 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, in kfd_parse_subtype_mem() argument
1103 mem->proximity_domain); in kfd_parse_subtype_mem()
1105 if (mem->proximity_domain == dev->proximity_domain) { in kfd_parse_subtype_mem()
1107 if (dev->node_props.cpu_cores_count == 0) { in kfd_parse_subtype_mem()
1109 if (mem->visibility_type == 0) in kfd_parse_subtype_mem()
1114 heap_type = mem->visibility_type; in kfd_parse_subtype_mem()
1118 if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) in kfd_parse_subtype_mem()
1120 if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) in kfd_parse_subtype_mem()
1124 ((uint64_t)mem->length_high << 32) + in kfd_parse_subtype_mem()
1125 mem->length_low; in kfd_parse_subtype_mem()
1126 width = mem->width; in kfd_parse_subtype_mem()
1135 props->size_in_bytes += size_in_bytes; in kfd_parse_subtype_mem()
1141 return -ENOMEM; in kfd_parse_subtype_mem()
1143 props->heap_type = heap_type; in kfd_parse_subtype_mem()
1144 props->flags = flags; in kfd_parse_subtype_mem()
1145 props->size_in_bytes = size_in_bytes; in kfd_parse_subtype_mem()
1146 props->width = width; in kfd_parse_subtype_mem()
1148 dev->node_props.mem_banks_count++; in kfd_parse_subtype_mem()
1149 list_add_tail(&props->list, &dev->mem_props); in kfd_parse_subtype_mem()
1158 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1169 id = cache->processor_id_low; in kfd_parse_subtype_cache()
1173 total_num_of_cu = (dev->node_props.array_count * in kfd_parse_subtype_cache()
1174 dev->node_props.cu_per_simd_array); in kfd_parse_subtype_cache()
1184 if ((id >= dev->node_props.cpu_core_id_base && in kfd_parse_subtype_cache()
1185 id <= dev->node_props.cpu_core_id_base + in kfd_parse_subtype_cache()
1186 dev->node_props.cpu_cores_count) || in kfd_parse_subtype_cache()
1187 (id >= dev->node_props.simd_id_base && in kfd_parse_subtype_cache()
1188 id < dev->node_props.simd_id_base + in kfd_parse_subtype_cache()
1192 return -ENOMEM; in kfd_parse_subtype_cache()
1194 props->processor_id_low = id; in kfd_parse_subtype_cache()
1195 props->cache_level = cache->cache_level; in kfd_parse_subtype_cache()
1196 props->cache_size = cache->cache_size; in kfd_parse_subtype_cache()
1197 props->cacheline_size = cache->cache_line_size; in kfd_parse_subtype_cache()
1198 props->cachelines_per_tag = cache->lines_per_tag; in kfd_parse_subtype_cache()
1199 props->cache_assoc = cache->associativity; in kfd_parse_subtype_cache()
1200 props->cache_latency = cache->cache_latency; in kfd_parse_subtype_cache()
1202 memcpy(props->sibling_map, cache->sibling_map, in kfd_parse_subtype_cache()
1206 props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; in kfd_parse_subtype_cache()
1208 if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) in kfd_parse_subtype_cache()
1209 props->cache_type |= HSA_CACHE_TYPE_DATA; in kfd_parse_subtype_cache()
1210 if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) in kfd_parse_subtype_cache()
1211 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in kfd_parse_subtype_cache()
1212 if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) in kfd_parse_subtype_cache()
1213 props->cache_type |= HSA_CACHE_TYPE_CPU; in kfd_parse_subtype_cache()
1214 if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) in kfd_parse_subtype_cache()
1215 props->cache_type |= HSA_CACHE_TYPE_HSACU; in kfd_parse_subtype_cache()
1217 dev->node_props.caches_count++; in kfd_parse_subtype_cache()
1218 list_add_tail(&props->list, &dev->cache_props); in kfd_parse_subtype_cache()
1227 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1238 id_from = iolink->proximity_domain_from; in kfd_parse_subtype_iolink()
1239 id_to = iolink->proximity_domain_to; in kfd_parse_subtype_iolink()
1244 if (id_from == dev->proximity_domain) { in kfd_parse_subtype_iolink()
1247 return -ENOMEM; in kfd_parse_subtype_iolink()
1249 props->node_from = id_from; in kfd_parse_subtype_iolink()
1250 props->node_to = id_to; in kfd_parse_subtype_iolink()
1251 props->ver_maj = iolink->version_major; in kfd_parse_subtype_iolink()
1252 props->ver_min = iolink->version_minor; in kfd_parse_subtype_iolink()
1253 props->iolink_type = iolink->io_interface_type; in kfd_parse_subtype_iolink()
1255 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_parse_subtype_iolink()
1256 props->weight = 20; in kfd_parse_subtype_iolink()
1257 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_parse_subtype_iolink()
1258 props->weight = iolink->weight_xgmi; in kfd_parse_subtype_iolink()
1260 props->weight = node_distance(id_from, id_to); in kfd_parse_subtype_iolink()
1262 props->min_latency = iolink->minimum_latency; in kfd_parse_subtype_iolink()
1263 props->max_latency = iolink->maximum_latency; in kfd_parse_subtype_iolink()
1264 props->min_bandwidth = iolink->minimum_bandwidth_mbs; in kfd_parse_subtype_iolink()
1265 props->max_bandwidth = iolink->maximum_bandwidth_mbs; in kfd_parse_subtype_iolink()
1266 props->rec_transfer_size = in kfd_parse_subtype_iolink()
1267 iolink->recommended_transfer_size; in kfd_parse_subtype_iolink()
1269 dev->node_props.io_links_count++; in kfd_parse_subtype_iolink()
1270 list_add_tail(&props->list, &dev->io_link_props); in kfd_parse_subtype_iolink()
1275 /* CPU topology is created before GPUs are detected, so CPU->GPU in kfd_parse_subtype_iolink()
1277 * means a GPU is detected and we are adding GPU->CPU to the topology. in kfd_parse_subtype_iolink()
1278 * At this time, also add the corresponded CPU->GPU link if GPU in kfd_parse_subtype_iolink()
1283 if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { in kfd_parse_subtype_iolink()
1286 return -ENODEV; in kfd_parse_subtype_iolink()
1290 return -ENOMEM; in kfd_parse_subtype_iolink()
1292 props2->node_from = id_to; in kfd_parse_subtype_iolink()
1293 props2->node_to = id_from; in kfd_parse_subtype_iolink()
1294 props2->kobj = NULL; in kfd_parse_subtype_iolink()
1295 to_dev->node_props.io_links_count++; in kfd_parse_subtype_iolink()
1296 list_add_tail(&props2->list, &to_dev->io_link_props); in kfd_parse_subtype_iolink()
1302 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1304 * @sub_type_hdr - subtype section of crat_image
1305 * @device_list - list of topology devices present in this crat_image
1311 struct crat_subtype_memory *mem; in kfd_parse_subtype() local
1316 switch (sub_type_hdr->type) { in kfd_parse_subtype()
1322 mem = (struct crat_subtype_memory *)sub_type_hdr; in kfd_parse_subtype()
1323 ret = kfd_parse_subtype_mem(mem, device_list); in kfd_parse_subtype()
1347 sub_type_hdr->type); in kfd_parse_subtype()
1353 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1356 * @crat_image - input image containing CRAT
1357 * @device_list - [OUT] list of kfd_topology_device generated after
1359 * @proximity_domain - Proximity domain of the first device in the table
1361 * Return - 0 if successful else -ve value
1375 return -EINVAL; in kfd_parse_crat_table()
1379 return -EINVAL; in kfd_parse_crat_table()
1382 num_nodes = crat_table->num_domains; in kfd_parse_crat_table()
1383 image_len = crat_table->length; in kfd_parse_crat_table()
1391 top_dev->proximity_domain = proximity_domain++; in kfd_parse_crat_table()
1395 ret = -ENOMEM; in kfd_parse_crat_table()
1399 memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); in kfd_parse_crat_table()
1400 memcpy(top_dev->oem_table_id, crat_table->oem_table_id, in kfd_parse_crat_table()
1402 top_dev->oem_revision = crat_table->oem_revision; in kfd_parse_crat_table()
1407 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { in kfd_parse_crat_table()
1414 sub_type_hdr->length); in kfd_parse_crat_table()
1429 struct amdgpu_device *adev = kdev->adev; in kfd_fill_gpu_cache_info_from_gfx_config()
1433 if (adev->gfx.config.gc_tcp_l1_size) { in kfd_fill_gpu_cache_info_from_gfx_config()
1434 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1439 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1440 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1446 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config()
1448 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config()
1453 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1454 pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1460 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config()
1461 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config()
1466 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1467 pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1473 if (adev->gfx.config.gc_gl1c_per_sa && in kfd_fill_gpu_cache_info_from_gfx_config()
1474 adev->gfx.config.gc_gl1c_size_per_instance) { in kfd_fill_gpu_cache_info_from_gfx_config()
1475 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * in kfd_fill_gpu_cache_info_from_gfx_config()
1476 adev->gfx.config.gc_gl1c_size_per_instance; in kfd_fill_gpu_cache_info_from_gfx_config()
1481 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1487 if (adev->gfx.config.gc_gl2c_per_gpu) { in kfd_fill_gpu_cache_info_from_gfx_config()
1488 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; in kfd_fill_gpu_cache_info_from_gfx_config()
1493 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1494 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1500 if (adev->gmc.mall_size) { in kfd_fill_gpu_cache_info_from_gfx_config()
1501 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; in kfd_fill_gpu_cache_info_from_gfx_config()
1506 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1516 struct amdgpu_device *adev = kdev->adev; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1520 if (adev->gfx.config.gc_tcp_size_per_cu) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1521 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1532 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1534 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1540 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1544 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1545 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1551 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1555 if (adev->gfx.config.gc_tcc_size) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1556 pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1562 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1566 if (adev->gmc.mall_size) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1567 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1573 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1584 switch (kdev->adev->asic_type) { in kfd_get_gpu_cache_info()
1644 kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, in kfd_get_gpu_cache_info()
1715 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, in kfd_get_gpu_cache_info()
1737 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1743 * Return 0 if successful else return -ve value
1751 *avail_size -= sizeof(struct crat_subtype_computeunit); in kfd_fill_cu_for_cpu()
1753 return -ENOMEM; in kfd_fill_cu_for_cpu()
1758 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; in kfd_fill_cu_for_cpu()
1759 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); in kfd_fill_cu_for_cpu()
1760 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_cu_for_cpu()
1765 sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; in kfd_fill_cu_for_cpu()
1766 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_cu_for_cpu()
1767 sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); in kfd_fill_cu_for_cpu()
1768 if (sub_type_hdr->processor_id_low == -1) in kfd_fill_cu_for_cpu()
1769 return -EINVAL; in kfd_fill_cu_for_cpu()
1771 sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); in kfd_fill_cu_for_cpu()
1776 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1782 * Return 0 if successful else return -ve value
1792 *avail_size -= sizeof(struct crat_subtype_memory); in kfd_fill_mem_info_for_cpu()
1794 return -ENOMEM; in kfd_fill_mem_info_for_cpu()
1799 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; in kfd_fill_mem_info_for_cpu()
1800 sub_type_hdr->length = sizeof(struct crat_subtype_memory); in kfd_fill_mem_info_for_cpu()
1801 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_mem_info_for_cpu()
1811 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); in kfd_fill_mem_info_for_cpu()
1814 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); in kfd_fill_mem_info_for_cpu()
1815 sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); in kfd_fill_mem_info_for_cpu()
1816 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_mem_info_for_cpu()
1830 if (c->x86_vendor == X86_VENDOR_AMD) in kfd_fill_iolink_info_for_cpu()
1842 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_iolink_info_for_cpu()
1844 return -ENOMEM; in kfd_fill_iolink_info_for_cpu()
1849 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_iolink_info_for_cpu()
1850 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_iolink_info_for_cpu()
1851 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_iolink_info_for_cpu()
1854 sub_type_hdr->proximity_domain_from = numa_node_id; in kfd_fill_iolink_info_for_cpu()
1855 sub_type_hdr->proximity_domain_to = nid; in kfd_fill_iolink_info_for_cpu()
1856 sub_type_hdr->io_interface_type = link_type; in kfd_fill_iolink_info_for_cpu()
1866 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1886 return -EINVAL; in kfd_create_vcrat_image_cpu()
1891 avail_size -= sizeof(struct crat_header); in kfd_create_vcrat_image_cpu()
1893 return -ENOMEM; in kfd_create_vcrat_image_cpu()
1896 memcpy(&crat_table->signature, CRAT_SIGNATURE, in kfd_create_vcrat_image_cpu()
1897 sizeof(crat_table->signature)); in kfd_create_vcrat_image_cpu()
1898 crat_table->length = sizeof(struct crat_header); in kfd_create_vcrat_image_cpu()
1904 crat_table->oem_revision = acpi_table->revision; in kfd_create_vcrat_image_cpu()
1905 memcpy(crat_table->oem_id, acpi_table->oem_id, in kfd_create_vcrat_image_cpu()
1907 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, in kfd_create_vcrat_image_cpu()
1911 crat_table->total_entries = 0; in kfd_create_vcrat_image_cpu()
1912 crat_table->num_domains = 0; in kfd_create_vcrat_image_cpu()
1917 if (kfd_numa_node_to_apic_id(numa_node_id) == -1) in kfd_create_vcrat_image_cpu()
1922 crat_table->num_domains, in kfd_create_vcrat_image_cpu()
1926 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_cpu()
1927 crat_table->total_entries++; in kfd_create_vcrat_image_cpu()
1930 sub_type_hdr->length); in kfd_create_vcrat_image_cpu()
1934 crat_table->num_domains, in kfd_create_vcrat_image_cpu()
1938 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_cpu()
1939 crat_table->total_entries++; in kfd_create_vcrat_image_cpu()
1942 sub_type_hdr->length); in kfd_create_vcrat_image_cpu()
1953 crat_table->length += (sub_type_hdr->length * entries); in kfd_create_vcrat_image_cpu()
1954 crat_table->total_entries += entries; in kfd_create_vcrat_image_cpu()
1957 sub_type_hdr->length * entries); in kfd_create_vcrat_image_cpu()
1963 crat_table->num_domains++; in kfd_create_vcrat_image_cpu()
1974 *size = crat_table->length; in kfd_create_vcrat_image_cpu()
1986 *avail_size -= sizeof(struct crat_subtype_memory); in kfd_fill_gpu_memory_affinity()
1988 return -ENOMEM; in kfd_fill_gpu_memory_affinity()
1991 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; in kfd_fill_gpu_memory_affinity()
1992 sub_type_hdr->length = sizeof(struct crat_subtype_memory); in kfd_fill_gpu_memory_affinity()
1993 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_gpu_memory_affinity()
1995 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_gpu_memory_affinity()
1997 pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", in kfd_fill_gpu_memory_affinity()
2000 sub_type_hdr->length_low = lower_32_bits(size); in kfd_fill_gpu_memory_affinity()
2001 sub_type_hdr->length_high = upper_32_bits(size); in kfd_fill_gpu_memory_affinity()
2003 sub_type_hdr->width = local_mem_info->vram_width; in kfd_fill_gpu_memory_affinity()
2004 sub_type_hdr->visibility_type = type; in kfd_fill_gpu_memory_affinity()
2015 u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | in kfd_find_numa_node_in_srat()
2016 pci_dev_id(kdev->adev->pdev); in kfd_find_numa_node_in_srat()
2036 table_end = (unsigned long)table_header + table_header->length; in kfd_find_numa_node_in_srat()
2042 subtable_len = sub_header->length; in kfd_find_numa_node_in_srat()
2054 switch (sub_header->type) { in kfd_find_numa_node_in_srat()
2057 pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | in kfd_find_numa_node_in_srat()
2058 cpu->proximity_domain_lo; in kfd_find_numa_node_in_srat()
2064 bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | in kfd_find_numa_node_in_srat()
2065 *((u16 *)(&gpu->device_handle[2])); in kfd_find_numa_node_in_srat()
2068 numa_node = pxm_to_node(gpu->proximity_domain); in kfd_find_numa_node_in_srat()
2080 subtable_len = sub_header->length; in kfd_find_numa_node_in_srat()
2085 /* Workaround bad cpu-gpu binding case */ in kfd_find_numa_node_in_srat()
2091 set_dev_node(&kdev->adev->pdev->dev, numa_node); in kfd_find_numa_node_in_srat()
2098 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
2101 * @kdev - [IN] GPU device
2103 * @proximity_domain - proximity domain of the GPU node
2105 * Return 0 if successful else return -ve value
2112 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_direct_io_link_to_cpu()
2114 return -ENOMEM; in kfd_fill_gpu_direct_io_link_to_cpu()
2119 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_gpu_direct_io_link_to_cpu()
2120 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_direct_io_link_to_cpu()
2121 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_gpu_direct_io_link_to_cpu()
2123 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; in kfd_fill_gpu_direct_io_link_to_cpu()
2126 * TODO: Fill-in other fields of iolink subtype in kfd_fill_gpu_direct_io_link_to_cpu()
2128 if (kdev->adev->gmc.xgmi.connected_to_cpu || in kfd_fill_gpu_direct_io_link_to_cpu()
2130 kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) == in kfd_fill_gpu_direct_io_link_to_cpu()
2136 kdev->adev, NULL, true) : mem_bw; in kfd_fill_gpu_direct_io_link_to_cpu()
2143 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; in kfd_fill_gpu_direct_io_link_to_cpu()
2144 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; in kfd_fill_gpu_direct_io_link_to_cpu()
2145 sub_type_hdr->weight_xgmi = weight; in kfd_fill_gpu_direct_io_link_to_cpu()
2146 sub_type_hdr->minimum_bandwidth_mbs = bandwidth; in kfd_fill_gpu_direct_io_link_to_cpu()
2147 sub_type_hdr->maximum_bandwidth_mbs = bandwidth; in kfd_fill_gpu_direct_io_link_to_cpu()
2149 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; in kfd_fill_gpu_direct_io_link_to_cpu()
2150 sub_type_hdr->minimum_bandwidth_mbs = in kfd_fill_gpu_direct_io_link_to_cpu()
2151 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); in kfd_fill_gpu_direct_io_link_to_cpu()
2152 sub_type_hdr->maximum_bandwidth_mbs = in kfd_fill_gpu_direct_io_link_to_cpu()
2153 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); in kfd_fill_gpu_direct_io_link_to_cpu()
2156 sub_type_hdr->proximity_domain_from = proximity_domain; in kfd_fill_gpu_direct_io_link_to_cpu()
2159 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE && in kfd_fill_gpu_direct_io_link_to_cpu()
2164 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) in kfd_fill_gpu_direct_io_link_to_cpu()
2165 sub_type_hdr->proximity_domain_to = 0; in kfd_fill_gpu_direct_io_link_to_cpu()
2167 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; in kfd_fill_gpu_direct_io_link_to_cpu()
2169 sub_type_hdr->proximity_domain_to = 0; in kfd_fill_gpu_direct_io_link_to_cpu()
2181 bool use_ta_info = kdev->kfd->num_nodes == 1; in kfd_fill_gpu_xgmi_link_to_gpu()
2183 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_xgmi_link_to_gpu()
2185 return -ENOMEM; in kfd_fill_gpu_xgmi_link_to_gpu()
2189 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_gpu_xgmi_link_to_gpu()
2190 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_xgmi_link_to_gpu()
2191 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | in kfd_fill_gpu_xgmi_link_to_gpu()
2194 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; in kfd_fill_gpu_xgmi_link_to_gpu()
2195 sub_type_hdr->proximity_domain_from = proximity_domain_from; in kfd_fill_gpu_xgmi_link_to_gpu()
2196 sub_type_hdr->proximity_domain_to = proximity_domain_to; in kfd_fill_gpu_xgmi_link_to_gpu()
2199 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * in kfd_fill_gpu_xgmi_link_to_gpu()
2200 amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); in kfd_fill_gpu_xgmi_link_to_gpu()
2201 sub_type_hdr->maximum_bandwidth_mbs = in kfd_fill_gpu_xgmi_link_to_gpu()
2202 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, in kfd_fill_gpu_xgmi_link_to_gpu()
2203 peer_kdev->adev, false); in kfd_fill_gpu_xgmi_link_to_gpu()
2204 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? in kfd_fill_gpu_xgmi_link_to_gpu()
2205 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2207 bool is_single_hop = kdev->kfd == peer_kdev->kfd; in kfd_fill_gpu_xgmi_link_to_gpu()
2212 sub_type_hdr->weight_xgmi = weight; in kfd_fill_gpu_xgmi_link_to_gpu()
2213 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2214 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2220 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2231 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_create_vcrat_image_gpu()
2232 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_create_vcrat_image_gpu()
2243 return -EINVAL; in kfd_create_vcrat_image_gpu()
2248 avail_size -= sizeof(struct crat_header); in kfd_create_vcrat_image_gpu()
2251 memcpy(&crat_table->signature, CRAT_SIGNATURE, in kfd_create_vcrat_image_gpu()
2252 sizeof(crat_table->signature)); in kfd_create_vcrat_image_gpu()
2254 crat_table->length = sizeof(struct crat_header); in kfd_create_vcrat_image_gpu()
2255 crat_table->num_domains = 1; in kfd_create_vcrat_image_gpu()
2256 crat_table->total_entries = 0; in kfd_create_vcrat_image_gpu()
2261 avail_size -= sizeof(struct crat_subtype_computeunit); in kfd_create_vcrat_image_gpu()
2265 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; in kfd_create_vcrat_image_gpu()
2266 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); in kfd_create_vcrat_image_gpu()
2267 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_create_vcrat_image_gpu()
2271 cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; in kfd_create_vcrat_image_gpu()
2272 cu->proximity_domain = proximity_domain; in kfd_create_vcrat_image_gpu()
2274 cu->num_simd_per_cu = cu_info->simd_per_cu; in kfd_create_vcrat_image_gpu()
2275 cu->num_simd_cores = cu_info->simd_per_cu * in kfd_create_vcrat_image_gpu()
2276 (cu_info->number / kdev->kfd->num_nodes); in kfd_create_vcrat_image_gpu()
2277 cu->max_waves_simd = cu_info->max_waves_per_simd; in kfd_create_vcrat_image_gpu()
2279 cu->wave_front_size = cu_info->wave_front_size; in kfd_create_vcrat_image_gpu()
2280 cu->array_count = gfx_info->max_sh_per_se * in kfd_create_vcrat_image_gpu()
2281 gfx_info->max_shader_engines; in kfd_create_vcrat_image_gpu()
2282 total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh); in kfd_create_vcrat_image_gpu()
2283 cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); in kfd_create_vcrat_image_gpu()
2284 cu->num_cu_per_array = gfx_info->max_cu_per_sh; in kfd_create_vcrat_image_gpu()
2285 cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu; in kfd_create_vcrat_image_gpu()
2286 cu->num_banks = gfx_info->max_shader_engines; in kfd_create_vcrat_image_gpu()
2287 cu->lds_size_in_kb = cu_info->lds_size; in kfd_create_vcrat_image_gpu()
2289 cu->hsa_capability = 0; in kfd_create_vcrat_image_gpu()
2291 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2292 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2299 local_mem_info = kdev->local_mem_info; in kfd_create_vcrat_image_gpu()
2301 sub_type_hdr->length); in kfd_create_vcrat_image_gpu()
2303 if (kdev->adev->debug_largebar) in kfd_create_vcrat_image_gpu()
2324 crat_table->length += sizeof(struct crat_subtype_memory); in kfd_create_vcrat_image_gpu()
2325 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2332 sub_type_hdr->length); in kfd_create_vcrat_image_gpu()
2339 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2340 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2351 if (kdev->kfd->hive_id) { in kfd_create_vcrat_image_gpu()
2354 if (!peer_dev->gpu) in kfd_create_vcrat_image_gpu()
2356 if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) in kfd_create_vcrat_image_gpu()
2358 if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev)) in kfd_create_vcrat_image_gpu()
2364 &avail_size, kdev, peer_dev->gpu, in kfd_create_vcrat_image_gpu()
2369 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2370 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2373 *size = crat_table->length; in kfd_create_vcrat_image_gpu()
2379 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2387 * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2388 * COMPUTE_UNIT_GPU - Create VCRAT for GPU
2389 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2390 * -- this option is not currently implemented.
2394 * Return 0 if successful else return -ve value
2405 return -EINVAL; in kfd_create_crat_image_virtual()
2420 (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); in kfd_create_crat_image_virtual()
2423 return -ENOMEM; in kfd_create_crat_image_virtual()
2430 return -EINVAL; in kfd_create_crat_image_virtual()
2433 return -ENOMEM; in kfd_create_crat_image_virtual()
2440 ret = -EINVAL; in kfd_create_crat_image_virtual()
2444 ret = -EINVAL; in kfd_create_crat_image_virtual()
2458 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)