kfd_crat.c - OpenGrok cross reference for /linux/drivers/gpu/drm/amd/amdkfd/kfd

Lines Matching +full:gfx +full:- +full:mem
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
41  *	@total_cu_count - Total CUs present in the GPU including ones
1024 	dev->node_props.cpu_cores_count = cu->num_cpu_cores;  in kfd_populated_cu_info_cpu()
1025 	dev->node_props.cpu_core_id_base = cu->processor_id_low;  in kfd_populated_cu_info_cpu()
1026 	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)  in kfd_populated_cu_info_cpu()
1027 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;  in kfd_populated_cu_info_cpu()
1029 	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,  in kfd_populated_cu_info_cpu()
1030 			cu->processor_id_low);  in kfd_populated_cu_info_cpu()
1036 	dev->node_props.simd_id_base = cu->processor_id_low;  in kfd_populated_cu_info_gpu()
1037 	dev->node_props.simd_count = cu->num_simd_cores;  in kfd_populated_cu_info_gpu()
1038 	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;  in kfd_populated_cu_info_gpu()
1039 	dev->node_props.max_waves_per_simd = cu->max_waves_simd;  in kfd_populated_cu_info_gpu()
1040 	dev->node_props.wave_front_size = cu->wave_front_size;  in kfd_populated_cu_info_gpu()
1041 	dev->node_props.array_count = cu->array_count;  in kfd_populated_cu_info_gpu()
1042 	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;  in kfd_populated_cu_info_gpu()
1043 	dev->node_props.simd_per_cu = cu->num_simd_per_cu;  in kfd_populated_cu_info_gpu()
1044 	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;  in kfd_populated_cu_info_gpu()
1045 	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)  in kfd_populated_cu_info_gpu()
1046 		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;  in kfd_populated_cu_info_gpu()
1047 	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);  in kfd_populated_cu_info_gpu()
1050 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
1059 			cu->proximity_domain, cu->hsa_capability);  in kfd_parse_subtype_cu()
1061 		if (cu->proximity_domain == dev->proximity_domain) {  in kfd_parse_subtype_cu()
1062 			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)  in kfd_parse_subtype_cu()
1065 			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)  in kfd_parse_subtype_cu()
1080 	list_for_each_entry(props, &dev->mem_props, list) {  in find_subtype_mem()
1081 		if (props->heap_type == heap_type  in find_subtype_mem()
1082 				&& props->flags == flags  in find_subtype_mem()
1083 				&& props->width == width)  in find_subtype_mem()
1089 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1092 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,  in kfd_parse_subtype_mem()  argument
1103 			mem->proximity_domain);  in kfd_parse_subtype_mem()
1105 		if (mem->proximity_domain == dev->proximity_domain) {  in kfd_parse_subtype_mem()
1107 			if (dev->node_props.cpu_cores_count == 0) {  in kfd_parse_subtype_mem()
1109 				if (mem->visibility_type == 0)  in kfd_parse_subtype_mem()
1114 					heap_type = mem->visibility_type;  in kfd_parse_subtype_mem()
1118 			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)  in kfd_parse_subtype_mem()
1120 			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)  in kfd_parse_subtype_mem()
1124 				((uint64_t)mem->length_high << 32) +  in kfd_parse_subtype_mem()
1125 							mem->length_low;  in kfd_parse_subtype_mem()
1126 			width = mem->width;  in kfd_parse_subtype_mem()
1135 				props->size_in_bytes += size_in_bytes;  in kfd_parse_subtype_mem()
1141 				return -ENOMEM;  in kfd_parse_subtype_mem()
1143 			props->heap_type = heap_type;  in kfd_parse_subtype_mem()
1144 			props->flags = flags;  in kfd_parse_subtype_mem()
1145 			props->size_in_bytes = size_in_bytes;  in kfd_parse_subtype_mem()
1146 			props->width = width;  in kfd_parse_subtype_mem()
1148 			dev->node_props.mem_banks_count++;  in kfd_parse_subtype_mem()
1149 			list_add_tail(&props->list, &dev->mem_props);  in kfd_parse_subtype_mem()
1158 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1169 	id = cache->processor_id_low;  in kfd_parse_subtype_cache()
1173 		total_num_of_cu = (dev->node_props.array_count *  in kfd_parse_subtype_cache()
1174 					dev->node_props.cu_per_simd_array);  in kfd_parse_subtype_cache()
1184 		if ((id >= dev->node_props.cpu_core_id_base &&  in kfd_parse_subtype_cache()
1185 			id <= dev->node_props.cpu_core_id_base +  in kfd_parse_subtype_cache()
1186 				dev->node_props.cpu_cores_count) ||  in kfd_parse_subtype_cache()
1187 			(id >= dev->node_props.simd_id_base &&  in kfd_parse_subtype_cache()
1188 			id < dev->node_props.simd_id_base +  in kfd_parse_subtype_cache()
1192 				return -ENOMEM;  in kfd_parse_subtype_cache()
1194 			props->processor_id_low = id;  in kfd_parse_subtype_cache()
1195 			props->cache_level = cache->cache_level;  in kfd_parse_subtype_cache()
1196 			props->cache_size = cache->cache_size;  in kfd_parse_subtype_cache()
1197 			props->cacheline_size = cache->cache_line_size;  in kfd_parse_subtype_cache()
1198 			props->cachelines_per_tag = cache->lines_per_tag;  in kfd_parse_subtype_cache()
1199 			props->cache_assoc = cache->associativity;  in kfd_parse_subtype_cache()
1200 			props->cache_latency = cache->cache_latency;  in kfd_parse_subtype_cache()
1202 			memcpy(props->sibling_map, cache->sibling_map,  in kfd_parse_subtype_cache()
1206 			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;  in kfd_parse_subtype_cache()
1208 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)  in kfd_parse_subtype_cache()
1209 				props->cache_type |= HSA_CACHE_TYPE_DATA;  in kfd_parse_subtype_cache()
1210 			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)  in kfd_parse_subtype_cache()
1211 				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;  in kfd_parse_subtype_cache()
1212 			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)  in kfd_parse_subtype_cache()
1213 				props->cache_type |= HSA_CACHE_TYPE_CPU;  in kfd_parse_subtype_cache()
1214 			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)  in kfd_parse_subtype_cache()
1215 				props->cache_type |= HSA_CACHE_TYPE_HSACU;  in kfd_parse_subtype_cache()
1217 			dev->node_props.caches_count++;  in kfd_parse_subtype_cache()
1218 			list_add_tail(&props->list, &dev->cache_props);  in kfd_parse_subtype_cache()
1227 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1238 	id_from = iolink->proximity_domain_from;  in kfd_parse_subtype_iolink()
1239 	id_to = iolink->proximity_domain_to;  in kfd_parse_subtype_iolink()
1244 		if (id_from == dev->proximity_domain) {  in kfd_parse_subtype_iolink()
1247 				return -ENOMEM;  in kfd_parse_subtype_iolink()
1249 			props->node_from = id_from;  in kfd_parse_subtype_iolink()
1250 			props->node_to = id_to;  in kfd_parse_subtype_iolink()
1251 			props->ver_maj = iolink->version_major;  in kfd_parse_subtype_iolink()
1252 			props->ver_min = iolink->version_minor;  in kfd_parse_subtype_iolink()
1253 			props->iolink_type = iolink->io_interface_type;  in kfd_parse_subtype_iolink()
1255 			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)  in kfd_parse_subtype_iolink()
1256 				props->weight = 20;  in kfd_parse_subtype_iolink()
1257 			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)  in kfd_parse_subtype_iolink()
1258 				props->weight = iolink->weight_xgmi;  in kfd_parse_subtype_iolink()
1260 				props->weight = node_distance(id_from, id_to);  in kfd_parse_subtype_iolink()
1262 			props->min_latency = iolink->minimum_latency;  in kfd_parse_subtype_iolink()
1263 			props->max_latency = iolink->maximum_latency;  in kfd_parse_subtype_iolink()
1264 			props->min_bandwidth = iolink->minimum_bandwidth_mbs;  in kfd_parse_subtype_iolink()
1265 			props->max_bandwidth = iolink->maximum_bandwidth_mbs;  in kfd_parse_subtype_iolink()
1266 			props->rec_transfer_size =  in kfd_parse_subtype_iolink()
1267 					iolink->recommended_transfer_size;  in kfd_parse_subtype_iolink()
1269 			dev->node_props.io_links_count++;  in kfd_parse_subtype_iolink()
1270 			list_add_tail(&props->list, &dev->io_link_props);  in kfd_parse_subtype_iolink()
1275 	/* CPU topology is created before GPUs are detected, so CPU->GPU  in kfd_parse_subtype_iolink()
1277 	 * means a GPU is detected and we are adding GPU->CPU to the topology.  in kfd_parse_subtype_iolink()
1278 	 * At this time, also add the corresponded CPU->GPU link if GPU  in kfd_parse_subtype_iolink()
1283 	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {  in kfd_parse_subtype_iolink()
1286 			return -ENODEV;  in kfd_parse_subtype_iolink()
1290 			return -ENOMEM;  in kfd_parse_subtype_iolink()
1292 		props2->node_from = id_to;  in kfd_parse_subtype_iolink()
1293 		props2->node_to = id_from;  in kfd_parse_subtype_iolink()
1294 		props2->kobj = NULL;  in kfd_parse_subtype_iolink()
1295 		to_dev->node_props.io_links_count++;  in kfd_parse_subtype_iolink()
1296 		list_add_tail(&props2->list, &to_dev->io_link_props);  in kfd_parse_subtype_iolink()
1302 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1304  *	@sub_type_hdr - subtype section of crat_image
1305  *	@device_list - list of topology devices present in this crat_image
1311 	struct crat_subtype_memory *mem;  in kfd_parse_subtype()  local
1316 	switch (sub_type_hdr->type) {  in kfd_parse_subtype()
1322 		mem = (struct crat_subtype_memory *)sub_type_hdr;  in kfd_parse_subtype()
1323 		ret = kfd_parse_subtype_mem(mem, device_list);  in kfd_parse_subtype()
1347 				sub_type_hdr->type);  in kfd_parse_subtype()
1353 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1356  *	@crat_image - input image containing CRAT
1357  *	@device_list - [OUT] list of kfd_topology_device generated after
1359  *	@proximity_domain - Proximity domain of the first device in the table
1361  *	Return - 0 if successful else -ve value
1375 		return -EINVAL;  in kfd_parse_crat_table()
1379 		return -EINVAL;  in kfd_parse_crat_table()
1382 	num_nodes = crat_table->num_domains;  in kfd_parse_crat_table()
1383 	image_len = crat_table->length;  in kfd_parse_crat_table()
1391 		top_dev->proximity_domain = proximity_domain++;  in kfd_parse_crat_table()
1395 		ret = -ENOMEM;  in kfd_parse_crat_table()
1399 	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);  in kfd_parse_crat_table()
1400 	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,  in kfd_parse_crat_table()
1402 	top_dev->oem_revision = crat_table->oem_revision;  in kfd_parse_crat_table()
1407 		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {  in kfd_parse_crat_table()
1414 				sub_type_hdr->length);  in kfd_parse_crat_table()
1429 	struct amdgpu_device *adev = kdev->adev;  in kfd_fill_gpu_cache_info_from_gfx_config()
1433 	if (adev->gfx.config.gc_tcp_l1_size) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1434 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1439 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1440 		pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1446 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1448 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config()
1453 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1454 		pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1460 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1461 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config()
1466 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1467 		pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1473 	if (adev->gfx.config.gc_gl1c_per_sa &&  in kfd_fill_gpu_cache_info_from_gfx_config()
1474 	    adev->gfx.config.gc_gl1c_size_per_instance) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1475 		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *  in kfd_fill_gpu_cache_info_from_gfx_config()
1476 			adev->gfx.config.gc_gl1c_size_per_instance;  in kfd_fill_gpu_cache_info_from_gfx_config()
1481 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1487 	if (adev->gfx.config.gc_gl2c_per_gpu) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1488 		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;  in kfd_fill_gpu_cache_info_from_gfx_config()
1493 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1494 		pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1500 	if (adev->gmc.mall_size) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1501 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;  in kfd_fill_gpu_cache_info_from_gfx_config()
1506 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1516 	struct amdgpu_device *adev = kdev->adev;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1520 	if (adev->gfx.config.gc_tcp_size_per_cu) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1521 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1532 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1534 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1540 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1544 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1545 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1551 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1555 	if (adev->gfx.config.gc_tcc_size) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1556 		pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1562 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1566 	if (adev->gmc.mall_size) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1567 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1573 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1584 	switch (kdev->adev->asic_type) {  in kfd_get_gpu_cache_info()
1644 				kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,  in kfd_get_gpu_cache_info()
1715 				kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,  in kfd_get_gpu_cache_info()
1737 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1743  *	Return 0 if successful else return -ve value
1751 	*avail_size -= sizeof(struct crat_subtype_computeunit);  in kfd_fill_cu_for_cpu()
1753 		return -ENOMEM;  in kfd_fill_cu_for_cpu()
1758 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;  in kfd_fill_cu_for_cpu()
1759 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);  in kfd_fill_cu_for_cpu()
1760 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_cu_for_cpu()
1765 	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;  in kfd_fill_cu_for_cpu()
1766 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_cu_for_cpu()
1767 	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);  in kfd_fill_cu_for_cpu()
1768 	if (sub_type_hdr->processor_id_low == -1)  in kfd_fill_cu_for_cpu()
1769 		return -EINVAL;  in kfd_fill_cu_for_cpu()
1771 	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);  in kfd_fill_cu_for_cpu()
1776 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1782  *	Return 0 if successful else return -ve value
1792 	*avail_size -= sizeof(struct crat_subtype_memory);  in kfd_fill_mem_info_for_cpu()
1794 		return -ENOMEM;  in kfd_fill_mem_info_for_cpu()
1799 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;  in kfd_fill_mem_info_for_cpu()
1800 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);  in kfd_fill_mem_info_for_cpu()
1801 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_mem_info_for_cpu()
1811 		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);  in kfd_fill_mem_info_for_cpu()
1814 	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);  in kfd_fill_mem_info_for_cpu()
1815 	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);  in kfd_fill_mem_info_for_cpu()
1816 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_mem_info_for_cpu()
1830 	if (c->x86_vendor == X86_VENDOR_AMD)  in kfd_fill_iolink_info_for_cpu()
1842 		*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_iolink_info_for_cpu()
1844 			return -ENOMEM;  in kfd_fill_iolink_info_for_cpu()
1849 		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_iolink_info_for_cpu()
1850 		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_iolink_info_for_cpu()
1851 		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_iolink_info_for_cpu()
1854 		sub_type_hdr->proximity_domain_from = numa_node_id;  in kfd_fill_iolink_info_for_cpu()
1855 		sub_type_hdr->proximity_domain_to = nid;  in kfd_fill_iolink_info_for_cpu()
1856 		sub_type_hdr->io_interface_type = link_type;  in kfd_fill_iolink_info_for_cpu()
1866 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1886 		return -EINVAL;  in kfd_create_vcrat_image_cpu()
1891 	avail_size -= sizeof(struct crat_header);  in kfd_create_vcrat_image_cpu()
1893 		return -ENOMEM;  in kfd_create_vcrat_image_cpu()
1896 	memcpy(&crat_table->signature, CRAT_SIGNATURE,  in kfd_create_vcrat_image_cpu()
1897 			sizeof(crat_table->signature));  in kfd_create_vcrat_image_cpu()
1898 	crat_table->length = sizeof(struct crat_header);  in kfd_create_vcrat_image_cpu()
1904 		crat_table->oem_revision = acpi_table->revision;  in kfd_create_vcrat_image_cpu()
1905 		memcpy(crat_table->oem_id, acpi_table->oem_id,  in kfd_create_vcrat_image_cpu()
1907 		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,  in kfd_create_vcrat_image_cpu()
1911 	crat_table->total_entries = 0;  in kfd_create_vcrat_image_cpu()
1912 	crat_table->num_domains = 0;  in kfd_create_vcrat_image_cpu()
1917 		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)  in kfd_create_vcrat_image_cpu()
1922 			crat_table->num_domains,  in kfd_create_vcrat_image_cpu()
1926 		crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_cpu()
1927 		crat_table->total_entries++;  in kfd_create_vcrat_image_cpu()
1930 			sub_type_hdr->length);  in kfd_create_vcrat_image_cpu()
1934 			crat_table->num_domains,  in kfd_create_vcrat_image_cpu()
1938 		crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_cpu()
1939 		crat_table->total_entries++;  in kfd_create_vcrat_image_cpu()
1942 			sub_type_hdr->length);  in kfd_create_vcrat_image_cpu()
1953 			crat_table->length += (sub_type_hdr->length * entries);  in kfd_create_vcrat_image_cpu()
1954 			crat_table->total_entries += entries;  in kfd_create_vcrat_image_cpu()
1957 					sub_type_hdr->length * entries);  in kfd_create_vcrat_image_cpu()
1963 		crat_table->num_domains++;  in kfd_create_vcrat_image_cpu()
1974 	*size = crat_table->length;  in kfd_create_vcrat_image_cpu()
1986 	*avail_size -= sizeof(struct crat_subtype_memory);  in kfd_fill_gpu_memory_affinity()
1988 		return -ENOMEM;  in kfd_fill_gpu_memory_affinity()
1991 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;  in kfd_fill_gpu_memory_affinity()
1992 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);  in kfd_fill_gpu_memory_affinity()
1993 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_gpu_memory_affinity()
1995 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_gpu_memory_affinity()
1997 	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",  in kfd_fill_gpu_memory_affinity()
2000 	sub_type_hdr->length_low = lower_32_bits(size);  in kfd_fill_gpu_memory_affinity()
2001 	sub_type_hdr->length_high = upper_32_bits(size);  in kfd_fill_gpu_memory_affinity()
2003 	sub_type_hdr->width = local_mem_info->vram_width;  in kfd_fill_gpu_memory_affinity()
2004 	sub_type_hdr->visibility_type = type;  in kfd_fill_gpu_memory_affinity()
2015 	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |  in kfd_find_numa_node_in_srat()
2016 			pci_dev_id(kdev->adev->pdev);  in kfd_find_numa_node_in_srat()
2036 	table_end = (unsigned long)table_header + table_header->length;  in kfd_find_numa_node_in_srat()
2042 	subtable_len = sub_header->length;  in kfd_find_numa_node_in_srat()
2054 		switch (sub_header->type) {  in kfd_find_numa_node_in_srat()
2057 			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |  in kfd_find_numa_node_in_srat()
2058 					cpu->proximity_domain_lo;  in kfd_find_numa_node_in_srat()
2064 			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |  in kfd_find_numa_node_in_srat()
2065 					*((u16 *)(&gpu->device_handle[2]));  in kfd_find_numa_node_in_srat()
2068 				numa_node = pxm_to_node(gpu->proximity_domain);  in kfd_find_numa_node_in_srat()
2080 		subtable_len = sub_header->length;  in kfd_find_numa_node_in_srat()
2085 	/* Workaround bad cpu-gpu binding case */  in kfd_find_numa_node_in_srat()
2091 		set_dev_node(&kdev->adev->pdev->dev, numa_node);  in kfd_find_numa_node_in_srat()
2098 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
2101  *	@kdev - [IN] GPU device
2103  *	@proximity_domain - proximity domain of the GPU node
2105  *	Return 0 if successful else return -ve value
2112 	*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_direct_io_link_to_cpu()
2114 		return -ENOMEM;  in kfd_fill_gpu_direct_io_link_to_cpu()
2119 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_gpu_direct_io_link_to_cpu()
2120 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_direct_io_link_to_cpu()
2121 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_gpu_direct_io_link_to_cpu()
2123 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  in kfd_fill_gpu_direct_io_link_to_cpu()
2126 	 * TODO: Fill-in other fields of iolink subtype  in kfd_fill_gpu_direct_io_link_to_cpu()
2128 	if (kdev->adev->gmc.xgmi.connected_to_cpu ||  in kfd_fill_gpu_direct_io_link_to_cpu()
2130 	     kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==  in kfd_fill_gpu_direct_io_link_to_cpu()
2136 							kdev->adev, NULL, true) : mem_bw;  in kfd_fill_gpu_direct_io_link_to_cpu()
2143 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  in kfd_fill_gpu_direct_io_link_to_cpu()
2144 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  in kfd_fill_gpu_direct_io_link_to_cpu()
2145 		sub_type_hdr->weight_xgmi = weight;  in kfd_fill_gpu_direct_io_link_to_cpu()
2146 		sub_type_hdr->minimum_bandwidth_mbs = bandwidth;  in kfd_fill_gpu_direct_io_link_to_cpu()
2147 		sub_type_hdr->maximum_bandwidth_mbs = bandwidth;  in kfd_fill_gpu_direct_io_link_to_cpu()
2149 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;  in kfd_fill_gpu_direct_io_link_to_cpu()
2150 		sub_type_hdr->minimum_bandwidth_mbs =  in kfd_fill_gpu_direct_io_link_to_cpu()
2151 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);  in kfd_fill_gpu_direct_io_link_to_cpu()
2152 		sub_type_hdr->maximum_bandwidth_mbs =  in kfd_fill_gpu_direct_io_link_to_cpu()
2153 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);  in kfd_fill_gpu_direct_io_link_to_cpu()
2156 	sub_type_hdr->proximity_domain_from = proximity_domain;  in kfd_fill_gpu_direct_io_link_to_cpu()
2159 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&  in kfd_fill_gpu_direct_io_link_to_cpu()
2164 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)  in kfd_fill_gpu_direct_io_link_to_cpu()
2165 		sub_type_hdr->proximity_domain_to = 0;  in kfd_fill_gpu_direct_io_link_to_cpu()
2167 		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;  in kfd_fill_gpu_direct_io_link_to_cpu()
2169 	sub_type_hdr->proximity_domain_to = 0;  in kfd_fill_gpu_direct_io_link_to_cpu()
2181 	bool use_ta_info = kdev->kfd->num_nodes == 1;  in kfd_fill_gpu_xgmi_link_to_gpu()
2183 	*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_xgmi_link_to_gpu()
2185 		return -ENOMEM;  in kfd_fill_gpu_xgmi_link_to_gpu()
2189 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_gpu_xgmi_link_to_gpu()
2190 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_xgmi_link_to_gpu()
2191 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |  in kfd_fill_gpu_xgmi_link_to_gpu()
2194 	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  in kfd_fill_gpu_xgmi_link_to_gpu()
2195 	sub_type_hdr->proximity_domain_from = proximity_domain_from;  in kfd_fill_gpu_xgmi_link_to_gpu()
2196 	sub_type_hdr->proximity_domain_to = proximity_domain_to;  in kfd_fill_gpu_xgmi_link_to_gpu()
2199 		sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *  in kfd_fill_gpu_xgmi_link_to_gpu()
2200 			amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);  in kfd_fill_gpu_xgmi_link_to_gpu()
2201 		sub_type_hdr->maximum_bandwidth_mbs =  in kfd_fill_gpu_xgmi_link_to_gpu()
2202 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,  in kfd_fill_gpu_xgmi_link_to_gpu()
2203 							peer_kdev->adev, false);  in kfd_fill_gpu_xgmi_link_to_gpu()
2204 		sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?  in kfd_fill_gpu_xgmi_link_to_gpu()
2205 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2207 		bool is_single_hop = kdev->kfd == peer_kdev->kfd;  in kfd_fill_gpu_xgmi_link_to_gpu()
2212 		sub_type_hdr->weight_xgmi = weight;  in kfd_fill_gpu_xgmi_link_to_gpu()
2213 		sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2214 		sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2220 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2231 	struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;  in kfd_create_vcrat_image_gpu()
2232 	struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;  in kfd_create_vcrat_image_gpu()
2243 		return -EINVAL;  in kfd_create_vcrat_image_gpu()
2248 	avail_size -= sizeof(struct crat_header);  in kfd_create_vcrat_image_gpu()
2251 	memcpy(&crat_table->signature, CRAT_SIGNATURE,  in kfd_create_vcrat_image_gpu()
2252 			sizeof(crat_table->signature));  in kfd_create_vcrat_image_gpu()
2254 	crat_table->length = sizeof(struct crat_header);  in kfd_create_vcrat_image_gpu()
2255 	crat_table->num_domains = 1;  in kfd_create_vcrat_image_gpu()
2256 	crat_table->total_entries = 0;  in kfd_create_vcrat_image_gpu()
2261 	avail_size -= sizeof(struct crat_subtype_computeunit);  in kfd_create_vcrat_image_gpu()
2265 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;  in kfd_create_vcrat_image_gpu()
2266 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);  in kfd_create_vcrat_image_gpu()
2267 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_create_vcrat_image_gpu()
2271 	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;  in kfd_create_vcrat_image_gpu()
2272 	cu->proximity_domain = proximity_domain;  in kfd_create_vcrat_image_gpu()
2274 	cu->num_simd_per_cu = cu_info->simd_per_cu;  in kfd_create_vcrat_image_gpu()
2275 	cu->num_simd_cores = cu_info->simd_per_cu *  in kfd_create_vcrat_image_gpu()
2276 			(cu_info->number / kdev->kfd->num_nodes);  in kfd_create_vcrat_image_gpu()
2277 	cu->max_waves_simd = cu_info->max_waves_per_simd;  in kfd_create_vcrat_image_gpu()
2279 	cu->wave_front_size = cu_info->wave_front_size;  in kfd_create_vcrat_image_gpu()
2280 	cu->array_count = gfx_info->max_sh_per_se *  in kfd_create_vcrat_image_gpu()
2281 		gfx_info->max_shader_engines;  in kfd_create_vcrat_image_gpu()
2282 	total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);  in kfd_create_vcrat_image_gpu()
2283 	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);  in kfd_create_vcrat_image_gpu()
2284 	cu->num_cu_per_array = gfx_info->max_cu_per_sh;  in kfd_create_vcrat_image_gpu()
2285 	cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;  in kfd_create_vcrat_image_gpu()
2286 	cu->num_banks = gfx_info->max_shader_engines;  in kfd_create_vcrat_image_gpu()
2287 	cu->lds_size_in_kb = cu_info->lds_size;  in kfd_create_vcrat_image_gpu()
2289 	cu->hsa_capability = 0;  in kfd_create_vcrat_image_gpu()
2291 	crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2292 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2299 	local_mem_info = kdev->local_mem_info;  in kfd_create_vcrat_image_gpu()
2301 			sub_type_hdr->length);  in kfd_create_vcrat_image_gpu()
2303 	if (kdev->adev->debug_largebar)  in kfd_create_vcrat_image_gpu()
2324 	crat_table->length += sizeof(struct crat_subtype_memory);  in kfd_create_vcrat_image_gpu()
2325 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2332 		sub_type_hdr->length);  in kfd_create_vcrat_image_gpu()
2339 	crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2340 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2351 	if (kdev->kfd->hive_id) {  in kfd_create_vcrat_image_gpu()
2354 			if (!peer_dev->gpu)  in kfd_create_vcrat_image_gpu()
2356 			if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)  in kfd_create_vcrat_image_gpu()
2358 			if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev))  in kfd_create_vcrat_image_gpu()
2364 				&avail_size, kdev, peer_dev->gpu,  in kfd_create_vcrat_image_gpu()
2369 			crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2370 			crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2373 	*size = crat_table->length;  in kfd_create_vcrat_image_gpu()
2379 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2387  *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2388  *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
2389  *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2390  *			-- this option is not currently implemented.
2394  *	Return 0 if successful else return -ve value
2405 		return -EINVAL;  in kfd_create_crat_image_virtual()
2420 			(num_nodes - 1) * sizeof(struct crat_subtype_iolink));  in kfd_create_crat_image_virtual()
2423 			return -ENOMEM;  in kfd_create_crat_image_virtual()
2430 			return -EINVAL;  in kfd_create_crat_image_virtual()
2433 			return -ENOMEM;  in kfd_create_crat_image_virtual()
2440 		ret = -EINVAL;  in kfd_create_crat_image_virtual()
2444 		ret = -EINVAL;  in kfd_create_crat_image_virtual()
2458  *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
In current file

In project "undefined"

On Google