kfd_crat.c - OpenGrok cross reference for /linux/drivers/gpu/drm/amd/amdkfd/kfd

Lines Matching +full:gpu +full:- +full:id
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
32 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
33  * GPU processor ID are expressed with Bit[31]=1.
34  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
39 /* Return the next available gpu_processor_id and increment it for next GPU
40  *	@total_cu_count - Total CUs present in the GPU including ones
171 		/* L2 Data Cache per GPU (Total Tex Cache) */
214 		/* L2 Data Cache per GPU (Total Tex Cache) */
257 		/* L2 Data Cache per GPU (Total Tex Cache) */
300 		/* L2 Data Cache per GPU (Total Tex Cache) */
343 		/* L2 Data Cache per GPU (Total Tex Cache) */
386 		/* L2 Data Cache per GPU (Total Tex Cache) */
439 		/* L2 Data Cache per GPU (Total Tex Cache) */
492 		/* L2 Data Cache per GPU (Total Tex Cache) */
545 		/* L2 Data Cache per GPU (Total Tex Cache) */
598 		/* L2 Data Cache per GPU (Total Tex Cache) */
608 		/* L3 Data Cache per GPU */
661 		/* L2 Data Cache per GPU (Total Tex Cache) */
671 		/* L3 Data Cache per GPU */
724 		/* L2 Data Cache per GPU (Total Tex Cache) */
734 		/* L3 Data Cache per GPU */
787 		/* L2 Data Cache per GPU (Total Tex Cache) */
797 		/* L3 Data Cache per GPU */
850 		/* L2 Data Cache per GPU (Total Tex Cache) */
903 		/* L2 Data Cache per GPU (Total Tex Cache) */
956 		/* L2 Data Cache per GPU (Total Tex Cache) */
1009 		/* L2 Data Cache per GPU (Total Tex Cache) */
1023 	dev->node_props.cpu_cores_count = cu->num_cpu_cores;  in kfd_populated_cu_info_cpu()
1024 	dev->node_props.cpu_core_id_base = cu->processor_id_low;  in kfd_populated_cu_info_cpu()
1025 	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)  in kfd_populated_cu_info_cpu()
1026 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;  in kfd_populated_cu_info_cpu()
1028 	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,  in kfd_populated_cu_info_cpu()
1029 			cu->processor_id_low);  in kfd_populated_cu_info_cpu()
1035 	dev->node_props.simd_id_base = cu->processor_id_low;  in kfd_populated_cu_info_gpu()
1036 	dev->node_props.simd_count = cu->num_simd_cores;  in kfd_populated_cu_info_gpu()
1037 	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;  in kfd_populated_cu_info_gpu()
1038 	dev->node_props.max_waves_per_simd = cu->max_waves_simd;  in kfd_populated_cu_info_gpu()
1039 	dev->node_props.wave_front_size = cu->wave_front_size;  in kfd_populated_cu_info_gpu()
1040 	dev->node_props.array_count = cu->array_count;  in kfd_populated_cu_info_gpu()
1041 	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;  in kfd_populated_cu_info_gpu()
1042 	dev->node_props.simd_per_cu = cu->num_simd_per_cu;  in kfd_populated_cu_info_gpu()
1043 	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;  in kfd_populated_cu_info_gpu()
1044 	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)  in kfd_populated_cu_info_gpu()
1045 		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;  in kfd_populated_cu_info_gpu()
1046 	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);  in kfd_populated_cu_info_gpu()
1049 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
1058 			cu->proximity_domain, cu->hsa_capability);  in kfd_parse_subtype_cu()
1060 		if (cu->proximity_domain == dev->proximity_domain) {  in kfd_parse_subtype_cu()
1061 			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)  in kfd_parse_subtype_cu()
1064 			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)  in kfd_parse_subtype_cu()
1079 	list_for_each_entry(props, &dev->mem_props, list) {  in find_subtype_mem()
1080 		if (props->heap_type == heap_type  in find_subtype_mem()
1081 				&& props->flags == flags  in find_subtype_mem()
1082 				&& props->width == width)  in find_subtype_mem()
1088 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1102 			mem->proximity_domain);  in kfd_parse_subtype_mem()
1104 		if (mem->proximity_domain == dev->proximity_domain) {  in kfd_parse_subtype_mem()
1105 			/* We're on GPU node */  in kfd_parse_subtype_mem()
1106 			if (dev->node_props.cpu_cores_count == 0) {  in kfd_parse_subtype_mem()
1108 				if (mem->visibility_type == 0)  in kfd_parse_subtype_mem()
1113 					heap_type = mem->visibility_type;  in kfd_parse_subtype_mem()
1117 			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)  in kfd_parse_subtype_mem()
1119 			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)  in kfd_parse_subtype_mem()
1123 				((uint64_t)mem->length_high << 32) +  in kfd_parse_subtype_mem()
1124 							mem->length_low;  in kfd_parse_subtype_mem()
1125 			width = mem->width;  in kfd_parse_subtype_mem()
1134 				props->size_in_bytes += size_in_bytes;  in kfd_parse_subtype_mem()
1140 				return -ENOMEM;  in kfd_parse_subtype_mem()
1142 			props->heap_type = heap_type;  in kfd_parse_subtype_mem()
1143 			props->flags = flags;  in kfd_parse_subtype_mem()
1144 			props->size_in_bytes = size_in_bytes;  in kfd_parse_subtype_mem()
1145 			props->width = width;  in kfd_parse_subtype_mem()
1147 			dev->node_props.mem_banks_count++;  in kfd_parse_subtype_mem()
1148 			list_add_tail(&props->list, &dev->mem_props);  in kfd_parse_subtype_mem()
1157 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1165 	uint32_t id;  in kfd_parse_subtype_cache()  local
1168 	id = cache->processor_id_low;  in kfd_parse_subtype_cache()
1170 	pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);  in kfd_parse_subtype_cache()
1172 		total_num_of_cu = (dev->node_props.array_count *  in kfd_parse_subtype_cache()
1173 					dev->node_props.cu_per_simd_array);  in kfd_parse_subtype_cache()
1176 		 * information as it is associated with a CPU core or GPU  in kfd_parse_subtype_cache()
1177 		 * Compute Unit. So map the cache using CPU core Id or SIMD  in kfd_parse_subtype_cache()
1178 		 * (GPU) ID.  in kfd_parse_subtype_cache()
1183 		if ((id >= dev->node_props.cpu_core_id_base &&  in kfd_parse_subtype_cache()
1184 			id <= dev->node_props.cpu_core_id_base +  in kfd_parse_subtype_cache()
1185 				dev->node_props.cpu_cores_count) ||  in kfd_parse_subtype_cache()
1186 			(id >= dev->node_props.simd_id_base &&  in kfd_parse_subtype_cache()
1187 			id < dev->node_props.simd_id_base +  in kfd_parse_subtype_cache()
1191 				return -ENOMEM;  in kfd_parse_subtype_cache()
1193 			props->processor_id_low = id;  in kfd_parse_subtype_cache()
1194 			props->cache_level = cache->cache_level;  in kfd_parse_subtype_cache()
1195 			props->cache_size = cache->cache_size;  in kfd_parse_subtype_cache()
1196 			props->cacheline_size = cache->cache_line_size;  in kfd_parse_subtype_cache()
1197 			props->cachelines_per_tag = cache->lines_per_tag;  in kfd_parse_subtype_cache()
1198 			props->cache_assoc = cache->associativity;  in kfd_parse_subtype_cache()
1199 			props->cache_latency = cache->cache_latency;  in kfd_parse_subtype_cache()
1201 			memcpy(props->sibling_map, cache->sibling_map,  in kfd_parse_subtype_cache()
1205 			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;  in kfd_parse_subtype_cache()
1207 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)  in kfd_parse_subtype_cache()
1208 				props->cache_type |= HSA_CACHE_TYPE_DATA;  in kfd_parse_subtype_cache()
1209 			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)  in kfd_parse_subtype_cache()
1210 				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;  in kfd_parse_subtype_cache()
1211 			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)  in kfd_parse_subtype_cache()
1212 				props->cache_type |= HSA_CACHE_TYPE_CPU;  in kfd_parse_subtype_cache()
1213 			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)  in kfd_parse_subtype_cache()
1214 				props->cache_type |= HSA_CACHE_TYPE_HSACU;  in kfd_parse_subtype_cache()
1216 			dev->node_props.caches_count++;  in kfd_parse_subtype_cache()
1217 			list_add_tail(&props->list, &dev->cache_props);  in kfd_parse_subtype_cache()
1226 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1237 	id_from = iolink->proximity_domain_from;  in kfd_parse_subtype_iolink()
1238 	id_to = iolink->proximity_domain_to;  in kfd_parse_subtype_iolink()
1243 		if (id_from == dev->proximity_domain) {  in kfd_parse_subtype_iolink()
1246 				return -ENOMEM;  in kfd_parse_subtype_iolink()
1248 			props->node_from = id_from;  in kfd_parse_subtype_iolink()
1249 			props->node_to = id_to;  in kfd_parse_subtype_iolink()
1250 			props->ver_maj = iolink->version_major;  in kfd_parse_subtype_iolink()
1251 			props->ver_min = iolink->version_minor;  in kfd_parse_subtype_iolink()
1252 			props->iolink_type = iolink->io_interface_type;  in kfd_parse_subtype_iolink()
1254 			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)  in kfd_parse_subtype_iolink()
1255 				props->weight = 20;  in kfd_parse_subtype_iolink()
1256 			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)  in kfd_parse_subtype_iolink()
1257 				props->weight = iolink->weight_xgmi;  in kfd_parse_subtype_iolink()
1259 				props->weight = node_distance(id_from, id_to);  in kfd_parse_subtype_iolink()
1261 			props->min_latency = iolink->minimum_latency;  in kfd_parse_subtype_iolink()
1262 			props->max_latency = iolink->maximum_latency;  in kfd_parse_subtype_iolink()
1263 			props->min_bandwidth = iolink->minimum_bandwidth_mbs;  in kfd_parse_subtype_iolink()
1264 			props->max_bandwidth = iolink->maximum_bandwidth_mbs;  in kfd_parse_subtype_iolink()
1265 			props->rec_transfer_size =  in kfd_parse_subtype_iolink()
1266 					iolink->recommended_transfer_size;  in kfd_parse_subtype_iolink()
1268 			dev->node_props.io_links_count++;  in kfd_parse_subtype_iolink()
1269 			list_add_tail(&props->list, &dev->io_link_props);  in kfd_parse_subtype_iolink()
1274 	/* CPU topology is created before GPUs are detected, so CPU->GPU  in kfd_parse_subtype_iolink()
1276 	 * means a GPU is detected and we are adding GPU->CPU to the topology.  in kfd_parse_subtype_iolink()
1277 	 * At this time, also add the corresponded CPU->GPU link if GPU  in kfd_parse_subtype_iolink()
1282 	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {  in kfd_parse_subtype_iolink()
1285 			return -ENODEV;  in kfd_parse_subtype_iolink()
1289 			return -ENOMEM;  in kfd_parse_subtype_iolink()
1291 		props2->node_from = id_to;  in kfd_parse_subtype_iolink()
1292 		props2->node_to = id_from;  in kfd_parse_subtype_iolink()
1293 		props2->kobj = NULL;  in kfd_parse_subtype_iolink()
1294 		to_dev->node_props.io_links_count++;  in kfd_parse_subtype_iolink()
1295 		list_add_tail(&props2->list, &to_dev->io_link_props);  in kfd_parse_subtype_iolink()
1301 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1303  *	@sub_type_hdr - subtype section of crat_image
1304  *	@device_list - list of topology devices present in this crat_image
1315 	switch (sub_type_hdr->type) {  in kfd_parse_subtype()
1346 				sub_type_hdr->type);  in kfd_parse_subtype()
1352 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1355  *	@crat_image - input image containing CRAT
1356  *	@device_list - [OUT] list of kfd_topology_device generated after
1358  *	@proximity_domain - Proximity domain of the first device in the table
1360  *	Return - 0 if successful else -ve value
1374 		return -EINVAL;  in kfd_parse_crat_table()
1378 		return -EINVAL;  in kfd_parse_crat_table()
1381 	num_nodes = crat_table->num_domains;  in kfd_parse_crat_table()
1382 	image_len = crat_table->length;  in kfd_parse_crat_table()
1390 		top_dev->proximity_domain = proximity_domain++;  in kfd_parse_crat_table()
1394 		ret = -ENOMEM;  in kfd_parse_crat_table()
1398 	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);  in kfd_parse_crat_table()
1399 	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,  in kfd_parse_crat_table()
1401 	top_dev->oem_revision = crat_table->oem_revision;  in kfd_parse_crat_table()
1406 		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {  in kfd_parse_crat_table()
1413 				sub_type_hdr->length);  in kfd_parse_crat_table()
1427 	struct amdgpu_device *adev = kdev->adev;  in kfd_fill_gpu_cache_info_from_gfx_config()
1431 	if (adev->gfx.config.gc_tcp_l1_size) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1432 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1437 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1438 		pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1442 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1444 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config()
1449 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1450 		pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1454 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1455 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config()
1460 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1461 		pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1465 	if (adev->gfx.config.gc_gl1c_per_sa &&  in kfd_fill_gpu_cache_info_from_gfx_config()
1466 	    adev->gfx.config.gc_gl1c_size_per_instance) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1467 		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *  in kfd_fill_gpu_cache_info_from_gfx_config()
1468 			adev->gfx.config.gc_gl1c_size_per_instance;  in kfd_fill_gpu_cache_info_from_gfx_config()
1473 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1477 	/* L2 Data Cache per GPU (Total Tex Cache) */  in kfd_fill_gpu_cache_info_from_gfx_config()
1478 	if (adev->gfx.config.gc_gl2c_per_gpu) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1479 		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;  in kfd_fill_gpu_cache_info_from_gfx_config()
1484 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1485 		pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1488 	/* L3 Data Cache per GPU */  in kfd_fill_gpu_cache_info_from_gfx_config()
1489 	if (adev->gmc.mall_size) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1490 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;  in kfd_fill_gpu_cache_info_from_gfx_config()
1495 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1505 	struct amdgpu_device *adev = kdev->adev;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1509 	if (adev->gfx.config.gc_tcp_size_per_cu) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1510 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1519 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1521 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1526 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1530 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1531 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1536 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1539 	/* L2 Data Cache per GPU (Total Tex Cache) */  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1540 	if (adev->gfx.config.gc_tcc_size) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1541 		pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1546 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1549 	/* L3 Data Cache per GPU */  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1550 	if (adev->gmc.mall_size) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1551 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1556 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1566 	switch (kdev->adev->asic_type) {  in kfd_get_gpu_cache_info()
1625 				kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,  in kfd_get_gpu_cache_info()
1691 				kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);  in kfd_get_gpu_cache_info()
1705  * following amount is allocated for GPU Virtual CRAT. This is
1711 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1713  *	@numa_node_id: CPU NUMA node id
1717  *	Return 0 if successful else return -ve value
1725 	*avail_size -= sizeof(struct crat_subtype_computeunit);  in kfd_fill_cu_for_cpu()
1727 		return -ENOMEM;  in kfd_fill_cu_for_cpu()
1732 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;  in kfd_fill_cu_for_cpu()
1733 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);  in kfd_fill_cu_for_cpu()
1734 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_cu_for_cpu()
1739 	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;  in kfd_fill_cu_for_cpu()
1740 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_cu_for_cpu()
1741 	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);  in kfd_fill_cu_for_cpu()
1742 	if (sub_type_hdr->processor_id_low == -1)  in kfd_fill_cu_for_cpu()
1743 		return -EINVAL;  in kfd_fill_cu_for_cpu()
1745 	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);  in kfd_fill_cu_for_cpu()
1750 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1752  *	@numa_node_id: CPU NUMA node id
1756  *	Return 0 if successful else return -ve value
1766 	*avail_size -= sizeof(struct crat_subtype_memory);  in kfd_fill_mem_info_for_cpu()
1768 		return -ENOMEM;  in kfd_fill_mem_info_for_cpu()
1773 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;  in kfd_fill_mem_info_for_cpu()
1774 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);  in kfd_fill_mem_info_for_cpu()
1775 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_mem_info_for_cpu()
1785 		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);  in kfd_fill_mem_info_for_cpu()
1788 	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);  in kfd_fill_mem_info_for_cpu()
1789 	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);  in kfd_fill_mem_info_for_cpu()
1790 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_mem_info_for_cpu()
1804 	if (c->x86_vendor == X86_VENDOR_AMD)  in kfd_fill_iolink_info_for_cpu()
1816 		*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_iolink_info_for_cpu()
1818 			return -ENOMEM;  in kfd_fill_iolink_info_for_cpu()
1823 		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_iolink_info_for_cpu()
1824 		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_iolink_info_for_cpu()
1825 		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_iolink_info_for_cpu()
1828 		sub_type_hdr->proximity_domain_from = numa_node_id;  in kfd_fill_iolink_info_for_cpu()
1829 		sub_type_hdr->proximity_domain_to = nid;  in kfd_fill_iolink_info_for_cpu()
1830 		sub_type_hdr->io_interface_type = link_type;  in kfd_fill_iolink_info_for_cpu()
1840 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1860 		return -EINVAL;  in kfd_create_vcrat_image_cpu()
1865 	avail_size -= sizeof(struct crat_header);  in kfd_create_vcrat_image_cpu()
1867 		return -ENOMEM;  in kfd_create_vcrat_image_cpu()
1870 	memcpy(&crat_table->signature, CRAT_SIGNATURE,  in kfd_create_vcrat_image_cpu()
1871 			sizeof(crat_table->signature));  in kfd_create_vcrat_image_cpu()
1872 	crat_table->length = sizeof(struct crat_header);  in kfd_create_vcrat_image_cpu()
1878 		crat_table->oem_revision = acpi_table->revision;  in kfd_create_vcrat_image_cpu()
1879 		memcpy(crat_table->oem_id, acpi_table->oem_id,  in kfd_create_vcrat_image_cpu()
1881 		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,  in kfd_create_vcrat_image_cpu()
1885 	crat_table->total_entries = 0;  in kfd_create_vcrat_image_cpu()
1886 	crat_table->num_domains = 0;  in kfd_create_vcrat_image_cpu()
1891 		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)  in kfd_create_vcrat_image_cpu()
1896 			crat_table->num_domains,  in kfd_create_vcrat_image_cpu()
1900 		crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_cpu()
1901 		crat_table->total_entries++;  in kfd_create_vcrat_image_cpu()
1904 			sub_type_hdr->length);  in kfd_create_vcrat_image_cpu()
1908 			crat_table->num_domains,  in kfd_create_vcrat_image_cpu()
1912 		crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_cpu()
1913 		crat_table->total_entries++;  in kfd_create_vcrat_image_cpu()
1916 			sub_type_hdr->length);  in kfd_create_vcrat_image_cpu()
1927 			crat_table->length += (sub_type_hdr->length * entries);  in kfd_create_vcrat_image_cpu()
1928 			crat_table->total_entries += entries;  in kfd_create_vcrat_image_cpu()
1931 					sub_type_hdr->length * entries);  in kfd_create_vcrat_image_cpu()
1937 		crat_table->num_domains++;  in kfd_create_vcrat_image_cpu()
1948 	*size = crat_table->length;  in kfd_create_vcrat_image_cpu()
1960 	*avail_size -= sizeof(struct crat_subtype_memory);  in kfd_fill_gpu_memory_affinity()
1962 		return -ENOMEM;  in kfd_fill_gpu_memory_affinity()
1965 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;  in kfd_fill_gpu_memory_affinity()
1966 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);  in kfd_fill_gpu_memory_affinity()
1967 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_gpu_memory_affinity()
1969 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_gpu_memory_affinity()
1971 	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",  in kfd_fill_gpu_memory_affinity()
1974 	sub_type_hdr->length_low = lower_32_bits(size);  in kfd_fill_gpu_memory_affinity()
1975 	sub_type_hdr->length_high = upper_32_bits(size);  in kfd_fill_gpu_memory_affinity()
1977 	sub_type_hdr->width = local_mem_info->vram_width;  in kfd_fill_gpu_memory_affinity()
1978 	sub_type_hdr->visibility_type = type;  in kfd_fill_gpu_memory_affinity()
1989 	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |  in kfd_find_numa_node_in_srat()
1990 			pci_dev_id(kdev->adev->pdev);  in kfd_find_numa_node_in_srat()
1994 	struct acpi_srat_generic_affinity *gpu;  in kfd_find_numa_node_in_srat()  local
2010 	table_end = (unsigned long)table_header + table_header->length;  in kfd_find_numa_node_in_srat()
2016 	subtable_len = sub_header->length;  in kfd_find_numa_node_in_srat()
2028 		switch (sub_header->type) {  in kfd_find_numa_node_in_srat()
2031 			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |  in kfd_find_numa_node_in_srat()
2032 					cpu->proximity_domain_lo;  in kfd_find_numa_node_in_srat()
2037 			gpu = (struct acpi_srat_generic_affinity *)sub_header;  in kfd_find_numa_node_in_srat()
2038 			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |  in kfd_find_numa_node_in_srat()
2039 					*((u16 *)(&gpu->device_handle[2]));  in kfd_find_numa_node_in_srat()
2042 				numa_node = pxm_to_node(gpu->proximity_domain);  in kfd_find_numa_node_in_srat()
2054 		subtable_len = sub_header->length;  in kfd_find_numa_node_in_srat()
2059 	/* Workaround bad cpu-gpu binding case */  in kfd_find_numa_node_in_srat()
2065 		set_dev_node(&kdev->adev->pdev->dev, numa_node);  in kfd_find_numa_node_in_srat()
2072 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
2075  *	@kdev - [IN] GPU device
2077  *	@proximity_domain - proximity domain of the GPU node
2079  *	Return 0 if successful else return -ve value
2086 	*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_direct_io_link_to_cpu()
2088 		return -ENOMEM;  in kfd_fill_gpu_direct_io_link_to_cpu()
2093 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_gpu_direct_io_link_to_cpu()
2094 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_direct_io_link_to_cpu()
2095 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_gpu_direct_io_link_to_cpu()
2097 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  in kfd_fill_gpu_direct_io_link_to_cpu()
2100 	 * TODO: Fill-in other fields of iolink subtype  in kfd_fill_gpu_direct_io_link_to_cpu()
2102 	if (kdev->adev->gmc.xgmi.connected_to_cpu ||  in kfd_fill_gpu_direct_io_link_to_cpu()
2104 	     kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==  in kfd_fill_gpu_direct_io_link_to_cpu()
2110 							kdev->adev, NULL, true) : mem_bw;  in kfd_fill_gpu_direct_io_link_to_cpu()
2113 		 * with host gpu xgmi link, host can access gpu memory whether  in kfd_fill_gpu_direct_io_link_to_cpu()
2117 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  in kfd_fill_gpu_direct_io_link_to_cpu()
2118 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  in kfd_fill_gpu_direct_io_link_to_cpu()
2119 		sub_type_hdr->weight_xgmi = weight;  in kfd_fill_gpu_direct_io_link_to_cpu()
2120 		sub_type_hdr->minimum_bandwidth_mbs = bandwidth;  in kfd_fill_gpu_direct_io_link_to_cpu()
2121 		sub_type_hdr->maximum_bandwidth_mbs = bandwidth;  in kfd_fill_gpu_direct_io_link_to_cpu()
2123 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;  in kfd_fill_gpu_direct_io_link_to_cpu()
2124 		sub_type_hdr->minimum_bandwidth_mbs =  in kfd_fill_gpu_direct_io_link_to_cpu()
2125 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);  in kfd_fill_gpu_direct_io_link_to_cpu()
2126 		sub_type_hdr->maximum_bandwidth_mbs =  in kfd_fill_gpu_direct_io_link_to_cpu()
2127 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);  in kfd_fill_gpu_direct_io_link_to_cpu()
2130 	sub_type_hdr->proximity_domain_from = proximity_domain;  in kfd_fill_gpu_direct_io_link_to_cpu()
2133 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&  in kfd_fill_gpu_direct_io_link_to_cpu()
2138 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)  in kfd_fill_gpu_direct_io_link_to_cpu()
2139 		sub_type_hdr->proximity_domain_to = 0;  in kfd_fill_gpu_direct_io_link_to_cpu()
2141 		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;  in kfd_fill_gpu_direct_io_link_to_cpu()
2143 	sub_type_hdr->proximity_domain_to = 0;  in kfd_fill_gpu_direct_io_link_to_cpu()
2155 	bool use_ta_info = kdev->kfd->num_nodes == 1;  in kfd_fill_gpu_xgmi_link_to_gpu()
2157 	*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_xgmi_link_to_gpu()
2159 		return -ENOMEM;  in kfd_fill_gpu_xgmi_link_to_gpu()
2163 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_gpu_xgmi_link_to_gpu()
2164 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_xgmi_link_to_gpu()
2165 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |  in kfd_fill_gpu_xgmi_link_to_gpu()
2168 	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  in kfd_fill_gpu_xgmi_link_to_gpu()
2169 	sub_type_hdr->proximity_domain_from = proximity_domain_from;  in kfd_fill_gpu_xgmi_link_to_gpu()
2170 	sub_type_hdr->proximity_domain_to = proximity_domain_to;  in kfd_fill_gpu_xgmi_link_to_gpu()
2173 		sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *  in kfd_fill_gpu_xgmi_link_to_gpu()
2174 			amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);  in kfd_fill_gpu_xgmi_link_to_gpu()
2175 		sub_type_hdr->maximum_bandwidth_mbs =  in kfd_fill_gpu_xgmi_link_to_gpu()
2176 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,  in kfd_fill_gpu_xgmi_link_to_gpu()
2177 							peer_kdev->adev, false);  in kfd_fill_gpu_xgmi_link_to_gpu()
2178 		sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?  in kfd_fill_gpu_xgmi_link_to_gpu()
2179 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2181 		bool is_single_hop = kdev->kfd == peer_kdev->kfd;  in kfd_fill_gpu_xgmi_link_to_gpu()
2186 		sub_type_hdr->weight_xgmi = weight;  in kfd_fill_gpu_xgmi_link_to_gpu()
2187 		sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2188 		sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2194 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2196  *	@pcrat_image: Fill in VCRAT for GPU
2205 	struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;  in kfd_create_vcrat_image_gpu()
2206 	struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;  in kfd_create_vcrat_image_gpu()
2217 		return -EINVAL;  in kfd_create_vcrat_image_gpu()
2222 	avail_size -= sizeof(struct crat_header);  in kfd_create_vcrat_image_gpu()
2225 	memcpy(&crat_table->signature, CRAT_SIGNATURE,  in kfd_create_vcrat_image_gpu()
2226 			sizeof(crat_table->signature));  in kfd_create_vcrat_image_gpu()
2228 	crat_table->length = sizeof(struct crat_header);  in kfd_create_vcrat_image_gpu()
2229 	crat_table->num_domains = 1;  in kfd_create_vcrat_image_gpu()
2230 	crat_table->total_entries = 0;  in kfd_create_vcrat_image_gpu()
2235 	avail_size -= sizeof(struct crat_subtype_computeunit);  in kfd_create_vcrat_image_gpu()
2239 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;  in kfd_create_vcrat_image_gpu()
2240 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);  in kfd_create_vcrat_image_gpu()
2241 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_create_vcrat_image_gpu()
2245 	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;  in kfd_create_vcrat_image_gpu()
2246 	cu->proximity_domain = proximity_domain;  in kfd_create_vcrat_image_gpu()
2248 	cu->num_simd_per_cu = cu_info->simd_per_cu;  in kfd_create_vcrat_image_gpu()
2249 	cu->num_simd_cores = cu_info->simd_per_cu *  in kfd_create_vcrat_image_gpu()
2250 			(cu_info->number / kdev->kfd->num_nodes);  in kfd_create_vcrat_image_gpu()
2251 	cu->max_waves_simd = cu_info->max_waves_per_simd;  in kfd_create_vcrat_image_gpu()
2253 	cu->wave_front_size = cu_info->wave_front_size;  in kfd_create_vcrat_image_gpu()
2254 	cu->array_count = gfx_info->max_sh_per_se *  in kfd_create_vcrat_image_gpu()
2255 		gfx_info->max_shader_engines;  in kfd_create_vcrat_image_gpu()
2256 	total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);  in kfd_create_vcrat_image_gpu()
2257 	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);  in kfd_create_vcrat_image_gpu()
2258 	cu->num_cu_per_array = gfx_info->max_cu_per_sh;  in kfd_create_vcrat_image_gpu()
2259 	cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;  in kfd_create_vcrat_image_gpu()
2260 	cu->num_banks = gfx_info->max_shader_engines;  in kfd_create_vcrat_image_gpu()
2261 	cu->lds_size_in_kb = cu_info->lds_size;  in kfd_create_vcrat_image_gpu()
2263 	cu->hsa_capability = 0;  in kfd_create_vcrat_image_gpu()
2265 	crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2266 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2273 	local_mem_info = kdev->local_mem_info;  in kfd_create_vcrat_image_gpu()
2275 			sub_type_hdr->length);  in kfd_create_vcrat_image_gpu()
2277 	if (kdev->adev->debug_largebar)  in kfd_create_vcrat_image_gpu()
2298 	crat_table->length += sizeof(struct crat_subtype_memory);  in kfd_create_vcrat_image_gpu()
2299 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2302 	 *  Only direct links are added here which is Link from GPU to  in kfd_create_vcrat_image_gpu()
2306 		sub_type_hdr->length);  in kfd_create_vcrat_image_gpu()
2313 	crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2314 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2318 	 * Direct links from GPU to other GPUs through xGMI.  in kfd_create_vcrat_image_gpu()
2321 	 * hive id (from this GPU to other GPU) . The reversed iolink  in kfd_create_vcrat_image_gpu()
2322 	 * (from other GPU to this GPU) will be added  in kfd_create_vcrat_image_gpu()
2325 	if (kdev->kfd->hive_id) {  in kfd_create_vcrat_image_gpu()
2328 			if (!peer_dev->gpu)  in kfd_create_vcrat_image_gpu()
2330 			if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)  in kfd_create_vcrat_image_gpu()
2336 				&avail_size, kdev, peer_dev->gpu,  in kfd_create_vcrat_image_gpu()
2341 			crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2342 			crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2345 	*size = crat_table->length;  in kfd_create_vcrat_image_gpu()
2346 	pr_info("Virtual CRAT table created for GPU\n");  in kfd_create_vcrat_image_gpu()
2351 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2359  *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2360  *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
2361  *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2362  *			-- this option is not currently implemented.
2366  *	Return 0 if successful else return -ve value
2377 		return -EINVAL;  in kfd_create_crat_image_virtual()
2382 	 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.  in kfd_create_crat_image_virtual()
2392 			(num_nodes - 1) * sizeof(struct crat_subtype_iolink));  in kfd_create_crat_image_virtual()
2395 			return -ENOMEM;  in kfd_create_crat_image_virtual()
2402 			return -EINVAL;  in kfd_create_crat_image_virtual()
2405 			return -ENOMEM;  in kfd_create_crat_image_virtual()
2412 		ret = -EINVAL;  in kfd_create_crat_image_virtual()
2416 		ret = -EINVAL;  in kfd_create_crat_image_virtual()
2430  *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)