xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_topology.c (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/errno.h>
27 #include <linux/acpi.h>
28 #include <linux/hash.h>
29 #include <linux/cpufreq.h>
30 #include <linux/log2.h>
31 #include <linux/dmi.h>
32 #include <linux/atomic.h>
33 
34 #include "kfd_priv.h"
35 #include "kfd_crat.h"
36 #include "kfd_topology.h"
37 #include "kfd_device_queue_manager.h"
38 #include "kfd_iommu.h"
39 #include "amdgpu_amdkfd.h"
40 
41 /* topology_device_list - Master list of all topology devices */
42 static struct list_head topology_device_list;
43 static struct kfd_system_properties sys_props;
44 
45 static DECLARE_RWSEM(topology_lock);
46 static atomic_t topology_crat_proximity_domain;
47 
48 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
49 						uint32_t proximity_domain)
50 {
51 	struct kfd_topology_device *top_dev;
52 	struct kfd_topology_device *device = NULL;
53 
54 	down_read(&topology_lock);
55 
56 	list_for_each_entry(top_dev, &topology_device_list, list)
57 		if (top_dev->proximity_domain == proximity_domain) {
58 			device = top_dev;
59 			break;
60 		}
61 
62 	up_read(&topology_lock);
63 
64 	return device;
65 }
66 
67 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
68 {
69 	struct kfd_topology_device *top_dev = NULL;
70 	struct kfd_topology_device *ret = NULL;
71 
72 	down_read(&topology_lock);
73 
74 	list_for_each_entry(top_dev, &topology_device_list, list)
75 		if (top_dev->gpu_id == gpu_id) {
76 			ret = top_dev;
77 			break;
78 		}
79 
80 	up_read(&topology_lock);
81 
82 	return ret;
83 }
84 
85 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
86 {
87 	struct kfd_topology_device *top_dev;
88 
89 	top_dev = kfd_topology_device_by_id(gpu_id);
90 	if (!top_dev)
91 		return NULL;
92 
93 	return top_dev->gpu;
94 }
95 
96 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
97 {
98 	struct kfd_topology_device *top_dev;
99 	struct kfd_dev *device = NULL;
100 
101 	down_read(&topology_lock);
102 
103 	list_for_each_entry(top_dev, &topology_device_list, list)
104 		if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
105 			device = top_dev->gpu;
106 			break;
107 		}
108 
109 	up_read(&topology_lock);
110 
111 	return device;
112 }
113 
114 struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
115 {
116 	struct kfd_topology_device *top_dev;
117 	struct kfd_dev *device = NULL;
118 
119 	down_read(&topology_lock);
120 
121 	list_for_each_entry(top_dev, &topology_device_list, list)
122 		if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
123 			device = top_dev->gpu;
124 			break;
125 		}
126 
127 	up_read(&topology_lock);
128 
129 	return device;
130 }
131 
132 /* Called with write topology_lock acquired */
133 static void kfd_release_topology_device(struct kfd_topology_device *dev)
134 {
135 	struct kfd_mem_properties *mem;
136 	struct kfd_cache_properties *cache;
137 	struct kfd_iolink_properties *iolink;
138 	struct kfd_perf_properties *perf;
139 
140 	list_del(&dev->list);
141 
142 	while (dev->mem_props.next != &dev->mem_props) {
143 		mem = container_of(dev->mem_props.next,
144 				struct kfd_mem_properties, list);
145 		list_del(&mem->list);
146 		kfree(mem);
147 	}
148 
149 	while (dev->cache_props.next != &dev->cache_props) {
150 		cache = container_of(dev->cache_props.next,
151 				struct kfd_cache_properties, list);
152 		list_del(&cache->list);
153 		kfree(cache);
154 	}
155 
156 	while (dev->io_link_props.next != &dev->io_link_props) {
157 		iolink = container_of(dev->io_link_props.next,
158 				struct kfd_iolink_properties, list);
159 		list_del(&iolink->list);
160 		kfree(iolink);
161 	}
162 
163 	while (dev->perf_props.next != &dev->perf_props) {
164 		perf = container_of(dev->perf_props.next,
165 				struct kfd_perf_properties, list);
166 		list_del(&perf->list);
167 		kfree(perf);
168 	}
169 
170 	kfree(dev);
171 }
172 
173 void kfd_release_topology_device_list(struct list_head *device_list)
174 {
175 	struct kfd_topology_device *dev;
176 
177 	while (!list_empty(device_list)) {
178 		dev = list_first_entry(device_list,
179 				       struct kfd_topology_device, list);
180 		kfd_release_topology_device(dev);
181 	}
182 }
183 
184 static void kfd_release_live_view(void)
185 {
186 	kfd_release_topology_device_list(&topology_device_list);
187 	memset(&sys_props, 0, sizeof(sys_props));
188 }
189 
190 struct kfd_topology_device *kfd_create_topology_device(
191 				struct list_head *device_list)
192 {
193 	struct kfd_topology_device *dev;
194 
195 	dev = kfd_alloc_struct(dev);
196 	if (!dev) {
197 		pr_err("No memory to allocate a topology device");
198 		return NULL;
199 	}
200 
201 	INIT_LIST_HEAD(&dev->mem_props);
202 	INIT_LIST_HEAD(&dev->cache_props);
203 	INIT_LIST_HEAD(&dev->io_link_props);
204 	INIT_LIST_HEAD(&dev->perf_props);
205 
206 	list_add_tail(&dev->list, device_list);
207 
208 	return dev;
209 }
210 
211 
212 #define sysfs_show_gen_prop(buffer, fmt, ...) \
213 		snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
214 #define sysfs_show_32bit_prop(buffer, name, value) \
215 		sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
216 #define sysfs_show_64bit_prop(buffer, name, value) \
217 		sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
218 #define sysfs_show_32bit_val(buffer, value) \
219 		sysfs_show_gen_prop(buffer, "%u\n", value)
220 #define sysfs_show_str_val(buffer, value) \
221 		sysfs_show_gen_prop(buffer, "%s\n", value)
222 
223 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
224 		char *buffer)
225 {
226 	ssize_t ret;
227 
228 	/* Making sure that the buffer is an empty string */
229 	buffer[0] = 0;
230 
231 	if (attr == &sys_props.attr_genid) {
232 		ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
233 	} else if (attr == &sys_props.attr_props) {
234 		sysfs_show_64bit_prop(buffer, "platform_oem",
235 				sys_props.platform_oem);
236 		sysfs_show_64bit_prop(buffer, "platform_id",
237 				sys_props.platform_id);
238 		ret = sysfs_show_64bit_prop(buffer, "platform_rev",
239 				sys_props.platform_rev);
240 	} else {
241 		ret = -EINVAL;
242 	}
243 
244 	return ret;
245 }
246 
247 static void kfd_topology_kobj_release(struct kobject *kobj)
248 {
249 	kfree(kobj);
250 }
251 
252 static const struct sysfs_ops sysprops_ops = {
253 	.show = sysprops_show,
254 };
255 
256 static struct kobj_type sysprops_type = {
257 	.release = kfd_topology_kobj_release,
258 	.sysfs_ops = &sysprops_ops,
259 };
260 
261 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
262 		char *buffer)
263 {
264 	ssize_t ret;
265 	struct kfd_iolink_properties *iolink;
266 
267 	/* Making sure that the buffer is an empty string */
268 	buffer[0] = 0;
269 
270 	iolink = container_of(attr, struct kfd_iolink_properties, attr);
271 	sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
272 	sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
273 	sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
274 	sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
275 	sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
276 	sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
277 	sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
278 	sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
279 	sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
280 	sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
281 	sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
282 			iolink->rec_transfer_size);
283 	ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
284 
285 	return ret;
286 }
287 
288 static const struct sysfs_ops iolink_ops = {
289 	.show = iolink_show,
290 };
291 
292 static struct kobj_type iolink_type = {
293 	.release = kfd_topology_kobj_release,
294 	.sysfs_ops = &iolink_ops,
295 };
296 
297 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
298 		char *buffer)
299 {
300 	ssize_t ret;
301 	struct kfd_mem_properties *mem;
302 
303 	/* Making sure that the buffer is an empty string */
304 	buffer[0] = 0;
305 
306 	mem = container_of(attr, struct kfd_mem_properties, attr);
307 	sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
308 	sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
309 	sysfs_show_32bit_prop(buffer, "flags", mem->flags);
310 	sysfs_show_32bit_prop(buffer, "width", mem->width);
311 	ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
312 
313 	return ret;
314 }
315 
316 static const struct sysfs_ops mem_ops = {
317 	.show = mem_show,
318 };
319 
320 static struct kobj_type mem_type = {
321 	.release = kfd_topology_kobj_release,
322 	.sysfs_ops = &mem_ops,
323 };
324 
325 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
326 		char *buffer)
327 {
328 	ssize_t ret;
329 	uint32_t i, j;
330 	struct kfd_cache_properties *cache;
331 
332 	/* Making sure that the buffer is an empty string */
333 	buffer[0] = 0;
334 
335 	cache = container_of(attr, struct kfd_cache_properties, attr);
336 	sysfs_show_32bit_prop(buffer, "processor_id_low",
337 			cache->processor_id_low);
338 	sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
339 	sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
340 	sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
341 	sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
342 			cache->cachelines_per_tag);
343 	sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
344 	sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
345 	sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
346 	snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
347 	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
348 		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
349 			/* Check each bit */
350 			if (cache->sibling_map[i] & (1 << j))
351 				ret = snprintf(buffer, PAGE_SIZE,
352 					 "%s%d%s", buffer, 1, ",");
353 			else
354 				ret = snprintf(buffer, PAGE_SIZE,
355 					 "%s%d%s", buffer, 0, ",");
356 		}
357 	/* Replace the last "," with end of line */
358 	*(buffer + strlen(buffer) - 1) = 0xA;
359 	return ret;
360 }
361 
362 static const struct sysfs_ops cache_ops = {
363 	.show = kfd_cache_show,
364 };
365 
366 static struct kobj_type cache_type = {
367 	.release = kfd_topology_kobj_release,
368 	.sysfs_ops = &cache_ops,
369 };
370 
371 /****** Sysfs of Performance Counters ******/
372 
373 struct kfd_perf_attr {
374 	struct kobj_attribute attr;
375 	uint32_t data;
376 };
377 
378 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
379 			char *buf)
380 {
381 	struct kfd_perf_attr *attr;
382 
383 	buf[0] = 0;
384 	attr = container_of(attrs, struct kfd_perf_attr, attr);
385 	if (!attr->data) /* invalid data for PMC */
386 		return 0;
387 	else
388 		return sysfs_show_32bit_val(buf, attr->data);
389 }
390 
391 #define KFD_PERF_DESC(_name, _data)			\
392 {							\
393 	.attr  = __ATTR(_name, 0444, perf_show, NULL),	\
394 	.data = _data,					\
395 }
396 
397 static struct kfd_perf_attr perf_attr_iommu[] = {
398 	KFD_PERF_DESC(max_concurrent, 0),
399 	KFD_PERF_DESC(num_counters, 0),
400 	KFD_PERF_DESC(counter_ids, 0),
401 };
402 /****************************************/
403 
404 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
405 		char *buffer)
406 {
407 	struct kfd_topology_device *dev;
408 	char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
409 	uint32_t i;
410 	uint32_t log_max_watch_addr;
411 
412 	/* Making sure that the buffer is an empty string */
413 	buffer[0] = 0;
414 
415 	if (strcmp(attr->name, "gpu_id") == 0) {
416 		dev = container_of(attr, struct kfd_topology_device,
417 				attr_gpuid);
418 		return sysfs_show_32bit_val(buffer, dev->gpu_id);
419 	}
420 
421 	if (strcmp(attr->name, "name") == 0) {
422 		dev = container_of(attr, struct kfd_topology_device,
423 				attr_name);
424 		for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
425 			public_name[i] =
426 					(char)dev->node_props.marketing_name[i];
427 			if (dev->node_props.marketing_name[i] == 0)
428 				break;
429 		}
430 		public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
431 		return sysfs_show_str_val(buffer, public_name);
432 	}
433 
434 	dev = container_of(attr, struct kfd_topology_device,
435 			attr_props);
436 	sysfs_show_32bit_prop(buffer, "cpu_cores_count",
437 			dev->node_props.cpu_cores_count);
438 	sysfs_show_32bit_prop(buffer, "simd_count",
439 			dev->node_props.simd_count);
440 	sysfs_show_32bit_prop(buffer, "mem_banks_count",
441 			dev->node_props.mem_banks_count);
442 	sysfs_show_32bit_prop(buffer, "caches_count",
443 			dev->node_props.caches_count);
444 	sysfs_show_32bit_prop(buffer, "io_links_count",
445 			dev->node_props.io_links_count);
446 	sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
447 			dev->node_props.cpu_core_id_base);
448 	sysfs_show_32bit_prop(buffer, "simd_id_base",
449 			dev->node_props.simd_id_base);
450 	sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
451 			dev->node_props.max_waves_per_simd);
452 	sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
453 			dev->node_props.lds_size_in_kb);
454 	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
455 			dev->node_props.gds_size_in_kb);
456 	sysfs_show_32bit_prop(buffer, "wave_front_size",
457 			dev->node_props.wave_front_size);
458 	sysfs_show_32bit_prop(buffer, "array_count",
459 			dev->node_props.array_count);
460 	sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
461 			dev->node_props.simd_arrays_per_engine);
462 	sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
463 			dev->node_props.cu_per_simd_array);
464 	sysfs_show_32bit_prop(buffer, "simd_per_cu",
465 			dev->node_props.simd_per_cu);
466 	sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
467 			dev->node_props.max_slots_scratch_cu);
468 	sysfs_show_32bit_prop(buffer, "vendor_id",
469 			dev->node_props.vendor_id);
470 	sysfs_show_32bit_prop(buffer, "device_id",
471 			dev->node_props.device_id);
472 	sysfs_show_32bit_prop(buffer, "location_id",
473 			dev->node_props.location_id);
474 	sysfs_show_32bit_prop(buffer, "drm_render_minor",
475 			dev->node_props.drm_render_minor);
476 	sysfs_show_64bit_prop(buffer, "hive_id",
477 			dev->node_props.hive_id);
478 
479 	if (dev->gpu) {
480 		log_max_watch_addr =
481 			__ilog2_u32(dev->gpu->device_info->num_of_watch_points);
482 
483 		if (log_max_watch_addr) {
484 			dev->node_props.capability |=
485 					HSA_CAP_WATCH_POINTS_SUPPORTED;
486 
487 			dev->node_props.capability |=
488 				((log_max_watch_addr <<
489 					HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
490 				HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
491 		}
492 
493 		if (dev->gpu->device_info->asic_family == CHIP_TONGA)
494 			dev->node_props.capability |=
495 					HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
496 
497 		sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
498 			dev->node_props.max_engine_clk_fcompute);
499 
500 		sysfs_show_64bit_prop(buffer, "local_mem_size",
501 				(unsigned long long int) 0);
502 
503 		sysfs_show_32bit_prop(buffer, "fw_version",
504 				dev->gpu->mec_fw_version);
505 		sysfs_show_32bit_prop(buffer, "capability",
506 				dev->node_props.capability);
507 		sysfs_show_32bit_prop(buffer, "sdma_fw_version",
508 				dev->gpu->sdma_fw_version);
509 	}
510 
511 	return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
512 					cpufreq_quick_get_max(0)/1000);
513 }
514 
515 static const struct sysfs_ops node_ops = {
516 	.show = node_show,
517 };
518 
519 static struct kobj_type node_type = {
520 	.release = kfd_topology_kobj_release,
521 	.sysfs_ops = &node_ops,
522 };
523 
524 static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
525 {
526 	sysfs_remove_file(kobj, attr);
527 	kobject_del(kobj);
528 	kobject_put(kobj);
529 }
530 
531 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
532 {
533 	struct kfd_iolink_properties *iolink;
534 	struct kfd_cache_properties *cache;
535 	struct kfd_mem_properties *mem;
536 	struct kfd_perf_properties *perf;
537 
538 	if (dev->kobj_iolink) {
539 		list_for_each_entry(iolink, &dev->io_link_props, list)
540 			if (iolink->kobj) {
541 				kfd_remove_sysfs_file(iolink->kobj,
542 							&iolink->attr);
543 				iolink->kobj = NULL;
544 			}
545 		kobject_del(dev->kobj_iolink);
546 		kobject_put(dev->kobj_iolink);
547 		dev->kobj_iolink = NULL;
548 	}
549 
550 	if (dev->kobj_cache) {
551 		list_for_each_entry(cache, &dev->cache_props, list)
552 			if (cache->kobj) {
553 				kfd_remove_sysfs_file(cache->kobj,
554 							&cache->attr);
555 				cache->kobj = NULL;
556 			}
557 		kobject_del(dev->kobj_cache);
558 		kobject_put(dev->kobj_cache);
559 		dev->kobj_cache = NULL;
560 	}
561 
562 	if (dev->kobj_mem) {
563 		list_for_each_entry(mem, &dev->mem_props, list)
564 			if (mem->kobj) {
565 				kfd_remove_sysfs_file(mem->kobj, &mem->attr);
566 				mem->kobj = NULL;
567 			}
568 		kobject_del(dev->kobj_mem);
569 		kobject_put(dev->kobj_mem);
570 		dev->kobj_mem = NULL;
571 	}
572 
573 	if (dev->kobj_perf) {
574 		list_for_each_entry(perf, &dev->perf_props, list) {
575 			kfree(perf->attr_group);
576 			perf->attr_group = NULL;
577 		}
578 		kobject_del(dev->kobj_perf);
579 		kobject_put(dev->kobj_perf);
580 		dev->kobj_perf = NULL;
581 	}
582 
583 	if (dev->kobj_node) {
584 		sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
585 		sysfs_remove_file(dev->kobj_node, &dev->attr_name);
586 		sysfs_remove_file(dev->kobj_node, &dev->attr_props);
587 		kobject_del(dev->kobj_node);
588 		kobject_put(dev->kobj_node);
589 		dev->kobj_node = NULL;
590 	}
591 }
592 
593 static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
594 		uint32_t id)
595 {
596 	struct kfd_iolink_properties *iolink;
597 	struct kfd_cache_properties *cache;
598 	struct kfd_mem_properties *mem;
599 	struct kfd_perf_properties *perf;
600 	int ret;
601 	uint32_t i, num_attrs;
602 	struct attribute **attrs;
603 
604 	if (WARN_ON(dev->kobj_node))
605 		return -EEXIST;
606 
607 	/*
608 	 * Creating the sysfs folders
609 	 */
610 	dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
611 	if (!dev->kobj_node)
612 		return -ENOMEM;
613 
614 	ret = kobject_init_and_add(dev->kobj_node, &node_type,
615 			sys_props.kobj_nodes, "%d", id);
616 	if (ret < 0)
617 		return ret;
618 
619 	dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
620 	if (!dev->kobj_mem)
621 		return -ENOMEM;
622 
623 	dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
624 	if (!dev->kobj_cache)
625 		return -ENOMEM;
626 
627 	dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
628 	if (!dev->kobj_iolink)
629 		return -ENOMEM;
630 
631 	dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
632 	if (!dev->kobj_perf)
633 		return -ENOMEM;
634 
635 	/*
636 	 * Creating sysfs files for node properties
637 	 */
638 	dev->attr_gpuid.name = "gpu_id";
639 	dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
640 	sysfs_attr_init(&dev->attr_gpuid);
641 	dev->attr_name.name = "name";
642 	dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
643 	sysfs_attr_init(&dev->attr_name);
644 	dev->attr_props.name = "properties";
645 	dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
646 	sysfs_attr_init(&dev->attr_props);
647 	ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
648 	if (ret < 0)
649 		return ret;
650 	ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
651 	if (ret < 0)
652 		return ret;
653 	ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
654 	if (ret < 0)
655 		return ret;
656 
657 	i = 0;
658 	list_for_each_entry(mem, &dev->mem_props, list) {
659 		mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
660 		if (!mem->kobj)
661 			return -ENOMEM;
662 		ret = kobject_init_and_add(mem->kobj, &mem_type,
663 				dev->kobj_mem, "%d", i);
664 		if (ret < 0)
665 			return ret;
666 
667 		mem->attr.name = "properties";
668 		mem->attr.mode = KFD_SYSFS_FILE_MODE;
669 		sysfs_attr_init(&mem->attr);
670 		ret = sysfs_create_file(mem->kobj, &mem->attr);
671 		if (ret < 0)
672 			return ret;
673 		i++;
674 	}
675 
676 	i = 0;
677 	list_for_each_entry(cache, &dev->cache_props, list) {
678 		cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
679 		if (!cache->kobj)
680 			return -ENOMEM;
681 		ret = kobject_init_and_add(cache->kobj, &cache_type,
682 				dev->kobj_cache, "%d", i);
683 		if (ret < 0)
684 			return ret;
685 
686 		cache->attr.name = "properties";
687 		cache->attr.mode = KFD_SYSFS_FILE_MODE;
688 		sysfs_attr_init(&cache->attr);
689 		ret = sysfs_create_file(cache->kobj, &cache->attr);
690 		if (ret < 0)
691 			return ret;
692 		i++;
693 	}
694 
695 	i = 0;
696 	list_for_each_entry(iolink, &dev->io_link_props, list) {
697 		iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
698 		if (!iolink->kobj)
699 			return -ENOMEM;
700 		ret = kobject_init_and_add(iolink->kobj, &iolink_type,
701 				dev->kobj_iolink, "%d", i);
702 		if (ret < 0)
703 			return ret;
704 
705 		iolink->attr.name = "properties";
706 		iolink->attr.mode = KFD_SYSFS_FILE_MODE;
707 		sysfs_attr_init(&iolink->attr);
708 		ret = sysfs_create_file(iolink->kobj, &iolink->attr);
709 		if (ret < 0)
710 			return ret;
711 		i++;
712 	}
713 
714 	/* All hardware blocks have the same number of attributes. */
715 	num_attrs = ARRAY_SIZE(perf_attr_iommu);
716 	list_for_each_entry(perf, &dev->perf_props, list) {
717 		perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
718 			* num_attrs + sizeof(struct attribute_group),
719 			GFP_KERNEL);
720 		if (!perf->attr_group)
721 			return -ENOMEM;
722 
723 		attrs = (struct attribute **)(perf->attr_group + 1);
724 		if (!strcmp(perf->block_name, "iommu")) {
725 		/* Information of IOMMU's num_counters and counter_ids is shown
726 		 * under /sys/bus/event_source/devices/amd_iommu. We don't
727 		 * duplicate here.
728 		 */
729 			perf_attr_iommu[0].data = perf->max_concurrent;
730 			for (i = 0; i < num_attrs; i++)
731 				attrs[i] = &perf_attr_iommu[i].attr.attr;
732 		}
733 		perf->attr_group->name = perf->block_name;
734 		perf->attr_group->attrs = attrs;
735 		ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
736 		if (ret < 0)
737 			return ret;
738 	}
739 
740 	return 0;
741 }
742 
743 /* Called with write topology lock acquired */
744 static int kfd_build_sysfs_node_tree(void)
745 {
746 	struct kfd_topology_device *dev;
747 	int ret;
748 	uint32_t i = 0;
749 
750 	list_for_each_entry(dev, &topology_device_list, list) {
751 		ret = kfd_build_sysfs_node_entry(dev, i);
752 		if (ret < 0)
753 			return ret;
754 		i++;
755 	}
756 
757 	return 0;
758 }
759 
760 /* Called with write topology lock acquired */
761 static void kfd_remove_sysfs_node_tree(void)
762 {
763 	struct kfd_topology_device *dev;
764 
765 	list_for_each_entry(dev, &topology_device_list, list)
766 		kfd_remove_sysfs_node_entry(dev);
767 }
768 
769 static int kfd_topology_update_sysfs(void)
770 {
771 	int ret;
772 
773 	pr_info("Creating topology SYSFS entries\n");
774 	if (!sys_props.kobj_topology) {
775 		sys_props.kobj_topology =
776 				kfd_alloc_struct(sys_props.kobj_topology);
777 		if (!sys_props.kobj_topology)
778 			return -ENOMEM;
779 
780 		ret = kobject_init_and_add(sys_props.kobj_topology,
781 				&sysprops_type,  &kfd_device->kobj,
782 				"topology");
783 		if (ret < 0)
784 			return ret;
785 
786 		sys_props.kobj_nodes = kobject_create_and_add("nodes",
787 				sys_props.kobj_topology);
788 		if (!sys_props.kobj_nodes)
789 			return -ENOMEM;
790 
791 		sys_props.attr_genid.name = "generation_id";
792 		sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
793 		sysfs_attr_init(&sys_props.attr_genid);
794 		ret = sysfs_create_file(sys_props.kobj_topology,
795 				&sys_props.attr_genid);
796 		if (ret < 0)
797 			return ret;
798 
799 		sys_props.attr_props.name = "system_properties";
800 		sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
801 		sysfs_attr_init(&sys_props.attr_props);
802 		ret = sysfs_create_file(sys_props.kobj_topology,
803 				&sys_props.attr_props);
804 		if (ret < 0)
805 			return ret;
806 	}
807 
808 	kfd_remove_sysfs_node_tree();
809 
810 	return kfd_build_sysfs_node_tree();
811 }
812 
813 static void kfd_topology_release_sysfs(void)
814 {
815 	kfd_remove_sysfs_node_tree();
816 	if (sys_props.kobj_topology) {
817 		sysfs_remove_file(sys_props.kobj_topology,
818 				&sys_props.attr_genid);
819 		sysfs_remove_file(sys_props.kobj_topology,
820 				&sys_props.attr_props);
821 		if (sys_props.kobj_nodes) {
822 			kobject_del(sys_props.kobj_nodes);
823 			kobject_put(sys_props.kobj_nodes);
824 			sys_props.kobj_nodes = NULL;
825 		}
826 		kobject_del(sys_props.kobj_topology);
827 		kobject_put(sys_props.kobj_topology);
828 		sys_props.kobj_topology = NULL;
829 	}
830 }
831 
832 /* Called with write topology_lock acquired */
833 static void kfd_topology_update_device_list(struct list_head *temp_list,
834 					struct list_head *master_list)
835 {
836 	while (!list_empty(temp_list)) {
837 		list_move_tail(temp_list->next, master_list);
838 		sys_props.num_devices++;
839 	}
840 }
841 
842 static void kfd_debug_print_topology(void)
843 {
844 	struct kfd_topology_device *dev;
845 
846 	down_read(&topology_lock);
847 
848 	dev = list_last_entry(&topology_device_list,
849 			struct kfd_topology_device, list);
850 	if (dev) {
851 		if (dev->node_props.cpu_cores_count &&
852 				dev->node_props.simd_count) {
853 			pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
854 				dev->node_props.device_id,
855 				dev->node_props.vendor_id);
856 		} else if (dev->node_props.cpu_cores_count)
857 			pr_info("Topology: Add CPU node\n");
858 		else if (dev->node_props.simd_count)
859 			pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
860 				dev->node_props.device_id,
861 				dev->node_props.vendor_id);
862 	}
863 	up_read(&topology_lock);
864 }
865 
866 /* Helper function for intializing platform_xx members of
867  * kfd_system_properties. Uses OEM info from the last CPU/APU node.
868  */
869 static void kfd_update_system_properties(void)
870 {
871 	struct kfd_topology_device *dev;
872 
873 	down_read(&topology_lock);
874 	dev = list_last_entry(&topology_device_list,
875 			struct kfd_topology_device, list);
876 	if (dev) {
877 		sys_props.platform_id =
878 			(*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
879 		sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
880 		sys_props.platform_rev = dev->oem_revision;
881 	}
882 	up_read(&topology_lock);
883 }
884 
885 static void find_system_memory(const struct dmi_header *dm,
886 	void *private)
887 {
888 	struct kfd_mem_properties *mem;
889 	u16 mem_width, mem_clock;
890 	struct kfd_topology_device *kdev =
891 		(struct kfd_topology_device *)private;
892 	const u8 *dmi_data = (const u8 *)(dm + 1);
893 
894 	if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
895 		mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
896 		mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
897 		list_for_each_entry(mem, &kdev->mem_props, list) {
898 			if (mem_width != 0xFFFF && mem_width != 0)
899 				mem->width = mem_width;
900 			if (mem_clock != 0)
901 				mem->mem_clk_max = mem_clock;
902 		}
903 	}
904 }
905 
906 /*
907  * Performance counters information is not part of CRAT but we would like to
908  * put them in the sysfs under topology directory for Thunk to get the data.
909  * This function is called before updating the sysfs.
910  */
911 static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
912 {
913 	/* These are the only counters supported so far */
914 	return kfd_iommu_add_perf_counters(kdev);
915 }
916 
917 /* kfd_add_non_crat_information - Add information that is not currently
918  *	defined in CRAT but is necessary for KFD topology
919  * @dev - topology device to which addition info is added
920  */
921 static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
922 {
923 	/* Check if CPU only node. */
924 	if (!kdev->gpu) {
925 		/* Add system memory information */
926 		dmi_walk(find_system_memory, kdev);
927 	}
928 	/* TODO: For GPU node, rearrange code from kfd_topology_add_device */
929 }
930 
931 /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
932  *	Ignore CRAT for all other devices. AMD APU is identified if both CPU
933  *	and GPU cores are present.
934  * @device_list - topology device list created by parsing ACPI CRAT table.
935  * @return - TRUE if invalid, FALSE is valid.
936  */
937 static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
938 {
939 	struct kfd_topology_device *dev;
940 
941 	list_for_each_entry(dev, device_list, list) {
942 		if (dev->node_props.cpu_cores_count &&
943 			dev->node_props.simd_count)
944 			return false;
945 	}
946 	pr_info("Ignoring ACPI CRAT on non-APU system\n");
947 	return true;
948 }
949 
950 int kfd_topology_init(void)
951 {
952 	void *crat_image = NULL;
953 	size_t image_size = 0;
954 	int ret;
955 	struct list_head temp_topology_device_list;
956 	int cpu_only_node = 0;
957 	struct kfd_topology_device *kdev;
958 	int proximity_domain;
959 
960 	/* topology_device_list - Master list of all topology devices
961 	 * temp_topology_device_list - temporary list created while parsing CRAT
962 	 * or VCRAT. Once parsing is complete the contents of list is moved to
963 	 * topology_device_list
964 	 */
965 
966 	/* Initialize the head for the both the lists */
967 	INIT_LIST_HEAD(&topology_device_list);
968 	INIT_LIST_HEAD(&temp_topology_device_list);
969 	init_rwsem(&topology_lock);
970 
971 	memset(&sys_props, 0, sizeof(sys_props));
972 
973 	/* Proximity domains in ACPI CRAT tables start counting at
974 	 * 0. The same should be true for virtual CRAT tables created
975 	 * at this stage. GPUs added later in kfd_topology_add_device
976 	 * use a counter.
977 	 */
978 	proximity_domain = 0;
979 
980 	/*
981 	 * Get the CRAT image from the ACPI. If ACPI doesn't have one
982 	 * or if ACPI CRAT is invalid create a virtual CRAT.
983 	 * NOTE: The current implementation expects all AMD APUs to have
984 	 *	CRAT. If no CRAT is available, it is assumed to be a CPU
985 	 */
986 	ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
987 	if (!ret) {
988 		ret = kfd_parse_crat_table(crat_image,
989 					   &temp_topology_device_list,
990 					   proximity_domain);
991 		if (ret ||
992 		    kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
993 			kfd_release_topology_device_list(
994 				&temp_topology_device_list);
995 			kfd_destroy_crat_image(crat_image);
996 			crat_image = NULL;
997 		}
998 	}
999 
1000 	if (!crat_image) {
1001 		ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
1002 						    COMPUTE_UNIT_CPU, NULL,
1003 						    proximity_domain);
1004 		cpu_only_node = 1;
1005 		if (ret) {
1006 			pr_err("Error creating VCRAT table for CPU\n");
1007 			return ret;
1008 		}
1009 
1010 		ret = kfd_parse_crat_table(crat_image,
1011 					   &temp_topology_device_list,
1012 					   proximity_domain);
1013 		if (ret) {
1014 			pr_err("Error parsing VCRAT table for CPU\n");
1015 			goto err;
1016 		}
1017 	}
1018 
1019 	kdev = list_first_entry(&temp_topology_device_list,
1020 				struct kfd_topology_device, list);
1021 	kfd_add_perf_to_topology(kdev);
1022 
1023 	down_write(&topology_lock);
1024 	kfd_topology_update_device_list(&temp_topology_device_list,
1025 					&topology_device_list);
1026 	atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
1027 	ret = kfd_topology_update_sysfs();
1028 	up_write(&topology_lock);
1029 
1030 	if (!ret) {
1031 		sys_props.generation_count++;
1032 		kfd_update_system_properties();
1033 		kfd_debug_print_topology();
1034 		pr_info("Finished initializing topology\n");
1035 	} else
1036 		pr_err("Failed to update topology in sysfs ret=%d\n", ret);
1037 
1038 	/* For nodes with GPU, this information gets added
1039 	 * when GPU is detected (kfd_topology_add_device).
1040 	 */
1041 	if (cpu_only_node) {
1042 		/* Add additional information to CPU only node created above */
1043 		down_write(&topology_lock);
1044 		kdev = list_first_entry(&topology_device_list,
1045 				struct kfd_topology_device, list);
1046 		up_write(&topology_lock);
1047 		kfd_add_non_crat_information(kdev);
1048 	}
1049 
1050 err:
1051 	kfd_destroy_crat_image(crat_image);
1052 	return ret;
1053 }
1054 
1055 void kfd_topology_shutdown(void)
1056 {
1057 	down_write(&topology_lock);
1058 	kfd_topology_release_sysfs();
1059 	kfd_release_live_view();
1060 	up_write(&topology_lock);
1061 }
1062 
1063 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
1064 {
1065 	uint32_t hashout;
1066 	uint32_t buf[7];
1067 	uint64_t local_mem_size;
1068 	int i;
1069 	struct kfd_local_mem_info local_mem_info;
1070 
1071 	if (!gpu)
1072 		return 0;
1073 
1074 	amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
1075 
1076 	local_mem_size = local_mem_info.local_mem_size_private +
1077 			local_mem_info.local_mem_size_public;
1078 
1079 	buf[0] = gpu->pdev->devfn;
1080 	buf[1] = gpu->pdev->subsystem_vendor;
1081 	buf[2] = gpu->pdev->subsystem_device;
1082 	buf[3] = gpu->pdev->device;
1083 	buf[4] = gpu->pdev->bus->number;
1084 	buf[5] = lower_32_bits(local_mem_size);
1085 	buf[6] = upper_32_bits(local_mem_size);
1086 
1087 	for (i = 0, hashout = 0; i < 7; i++)
1088 		hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
1089 
1090 	return hashout;
1091 }
1092 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1093  *		the GPU device is not already present in the topology device
1094  *		list then return NULL. This means a new topology device has to
1095  *		be created for this GPU.
1096  */
1097 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
1098 {
1099 	struct kfd_topology_device *dev;
1100 	struct kfd_topology_device *out_dev = NULL;
1101 
1102 	down_write(&topology_lock);
1103 	list_for_each_entry(dev, &topology_device_list, list) {
1104 		/* Discrete GPUs need their own topology device list
1105 		 * entries. Don't assign them to CPU/APU nodes.
1106 		 */
1107 		if (!gpu->device_info->needs_iommu_device &&
1108 		    dev->node_props.cpu_cores_count)
1109 			continue;
1110 
1111 		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
1112 			dev->gpu = gpu;
1113 			out_dev = dev;
1114 			break;
1115 		}
1116 	}
1117 	up_write(&topology_lock);
1118 	return out_dev;
1119 }
1120 
1121 static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
1122 {
1123 	/*
1124 	 * TODO: Generate an event for thunk about the arrival/removal
1125 	 * of the GPU
1126 	 */
1127 }
1128 
1129 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1130  *		patch this after CRAT parsing.
1131  */
1132 static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
1133 {
1134 	struct kfd_mem_properties *mem;
1135 	struct kfd_local_mem_info local_mem_info;
1136 
1137 	if (!dev)
1138 		return;
1139 
1140 	/* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
1141 	 * single bank of VRAM local memory.
1142 	 * for dGPUs - VCRAT reports only one bank of Local Memory
1143 	 * for APUs - If CRAT from ACPI reports more than one bank, then
1144 	 *	all the banks will report the same mem_clk_max information
1145 	 */
1146 	amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
1147 
1148 	list_for_each_entry(mem, &dev->mem_props, list)
1149 		mem->mem_clk_max = local_mem_info.mem_clk_max;
1150 }
1151 
1152 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
1153 {
1154 	struct kfd_iolink_properties *link, *cpu_link;
1155 	struct kfd_topology_device *cpu_dev;
1156 	uint32_t cap;
1157 	uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
1158 	uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
1159 
1160 	if (!dev || !dev->gpu)
1161 		return;
1162 
1163 	pcie_capability_read_dword(dev->gpu->pdev,
1164 			PCI_EXP_DEVCAP2, &cap);
1165 
1166 	if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
1167 		     PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
1168 		cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1169 			CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1170 
1171 	if (!dev->gpu->pci_atomic_requested ||
1172 	    dev->gpu->device_info->asic_family == CHIP_HAWAII)
1173 		flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1174 			CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1175 
1176 	/* GPU only creates direct links so apply flags setting to all */
1177 	list_for_each_entry(link, &dev->io_link_props, list) {
1178 		link->flags = flag;
1179 		cpu_dev = kfd_topology_device_by_proximity_domain(
1180 				link->node_to);
1181 		if (cpu_dev) {
1182 			list_for_each_entry(cpu_link,
1183 					    &cpu_dev->io_link_props, list)
1184 				if (cpu_link->node_to == link->node_from)
1185 					cpu_link->flags = cpu_flag;
1186 		}
1187 	}
1188 }
1189 
1190 int kfd_topology_add_device(struct kfd_dev *gpu)
1191 {
1192 	uint32_t gpu_id;
1193 	struct kfd_topology_device *dev;
1194 	struct kfd_cu_info cu_info;
1195 	int res = 0;
1196 	struct list_head temp_topology_device_list;
1197 	void *crat_image = NULL;
1198 	size_t image_size = 0;
1199 	int proximity_domain;
1200 
1201 	INIT_LIST_HEAD(&temp_topology_device_list);
1202 
1203 	gpu_id = kfd_generate_gpu_id(gpu);
1204 
1205 	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1206 
1207 	proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
1208 
1209 	/* Check to see if this gpu device exists in the topology_device_list.
1210 	 * If so, assign the gpu to that device,
1211 	 * else create a Virtual CRAT for this gpu device and then parse that
1212 	 * CRAT to create a new topology device. Once created assign the gpu to
1213 	 * that topology device
1214 	 */
1215 	dev = kfd_assign_gpu(gpu);
1216 	if (!dev) {
1217 		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
1218 						    COMPUTE_UNIT_GPU, gpu,
1219 						    proximity_domain);
1220 		if (res) {
1221 			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
1222 			       gpu_id);
1223 			return res;
1224 		}
1225 		res = kfd_parse_crat_table(crat_image,
1226 					   &temp_topology_device_list,
1227 					   proximity_domain);
1228 		if (res) {
1229 			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
1230 			       gpu_id);
1231 			goto err;
1232 		}
1233 
1234 		down_write(&topology_lock);
1235 		kfd_topology_update_device_list(&temp_topology_device_list,
1236 			&topology_device_list);
1237 
1238 		/* Update the SYSFS tree, since we added another topology
1239 		 * device
1240 		 */
1241 		res = kfd_topology_update_sysfs();
1242 		up_write(&topology_lock);
1243 
1244 		if (!res)
1245 			sys_props.generation_count++;
1246 		else
1247 			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
1248 						gpu_id, res);
1249 		dev = kfd_assign_gpu(gpu);
1250 		if (WARN_ON(!dev)) {
1251 			res = -ENODEV;
1252 			goto err;
1253 		}
1254 	}
1255 
1256 	dev->gpu_id = gpu_id;
1257 	gpu->id = gpu_id;
1258 
1259 	/* TODO: Move the following lines to function
1260 	 *	kfd_add_non_crat_information
1261 	 */
1262 
1263 	/* Fill-in additional information that is not available in CRAT but
1264 	 * needed for the topology
1265 	 */
1266 
1267 	amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
1268 	dev->node_props.simd_arrays_per_engine =
1269 		cu_info.num_shader_arrays_per_engine;
1270 
1271 	dev->node_props.vendor_id = gpu->pdev->vendor;
1272 	dev->node_props.device_id = gpu->pdev->device;
1273 	dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
1274 		gpu->pdev->devfn);
1275 	dev->node_props.max_engine_clk_fcompute =
1276 		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
1277 	dev->node_props.max_engine_clk_ccompute =
1278 		cpufreq_quick_get_max(0) / 1000;
1279 	dev->node_props.drm_render_minor =
1280 		gpu->shared_resources.drm_render_minor;
1281 
1282 	dev->node_props.hive_id = gpu->hive_id;
1283 
1284 	kfd_fill_mem_clk_max_info(dev);
1285 	kfd_fill_iolink_non_crat_info(dev);
1286 
1287 	switch (dev->gpu->device_info->asic_family) {
1288 	case CHIP_KAVERI:
1289 	case CHIP_HAWAII:
1290 	case CHIP_TONGA:
1291 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
1292 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1293 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1294 		break;
1295 	case CHIP_CARRIZO:
1296 	case CHIP_FIJI:
1297 	case CHIP_POLARIS10:
1298 	case CHIP_POLARIS11:
1299 	case CHIP_POLARIS12:
1300 		pr_debug("Adding doorbell packet type capability\n");
1301 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
1302 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1303 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1304 		break;
1305 	case CHIP_VEGA10:
1306 	case CHIP_VEGA12:
1307 	case CHIP_VEGA20:
1308 	case CHIP_RAVEN:
1309 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
1310 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1311 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1312 		break;
1313 	default:
1314 		WARN(1, "Unexpected ASIC family %u",
1315 		     dev->gpu->device_info->asic_family);
1316 	}
1317 
1318 	/* Fix errors in CZ CRAT.
1319 	 * simd_count: Carrizo CRAT reports wrong simd_count, probably
1320 	 *		because it doesn't consider masked out CUs
1321 	 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
1322 	 * capability flag: Carrizo CRAT doesn't report IOMMU flags
1323 	 */
1324 	if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1325 		dev->node_props.simd_count =
1326 			cu_info.simd_per_cu * cu_info.cu_active_number;
1327 		dev->node_props.max_waves_per_simd = 10;
1328 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1329 	}
1330 
1331 	kfd_debug_print_topology();
1332 
1333 	if (!res)
1334 		kfd_notify_gpu_change(gpu_id, 1);
1335 err:
1336 	kfd_destroy_crat_image(crat_image);
1337 	return res;
1338 }
1339 
1340 int kfd_topology_remove_device(struct kfd_dev *gpu)
1341 {
1342 	struct kfd_topology_device *dev, *tmp;
1343 	uint32_t gpu_id;
1344 	int res = -ENODEV;
1345 
1346 	down_write(&topology_lock);
1347 
1348 	list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
1349 		if (dev->gpu == gpu) {
1350 			gpu_id = dev->gpu_id;
1351 			kfd_remove_sysfs_node_entry(dev);
1352 			kfd_release_topology_device(dev);
1353 			sys_props.num_devices--;
1354 			res = 0;
1355 			if (kfd_topology_update_sysfs() < 0)
1356 				kfd_topology_release_sysfs();
1357 			break;
1358 		}
1359 
1360 	up_write(&topology_lock);
1361 
1362 	if (!res)
1363 		kfd_notify_gpu_change(gpu_id, 0);
1364 
1365 	return res;
1366 }
1367 
1368 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
1369  *	topology. If GPU device is found @idx, then valid kfd_dev pointer is
1370  *	returned through @kdev
1371  * Return -	0: On success (@kdev will be NULL for non GPU nodes)
1372  *		-1: If end of list
1373  */
1374 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
1375 {
1376 
1377 	struct kfd_topology_device *top_dev;
1378 	uint8_t device_idx = 0;
1379 
1380 	*kdev = NULL;
1381 	down_read(&topology_lock);
1382 
1383 	list_for_each_entry(top_dev, &topology_device_list, list) {
1384 		if (device_idx == idx) {
1385 			*kdev = top_dev->gpu;
1386 			up_read(&topology_lock);
1387 			return 0;
1388 		}
1389 
1390 		device_idx++;
1391 	}
1392 
1393 	up_read(&topology_lock);
1394 
1395 	return -1;
1396 
1397 }
1398 
1399 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
1400 {
1401 	int first_cpu_of_numa_node;
1402 
1403 	if (!cpumask || cpumask == cpu_none_mask)
1404 		return -1;
1405 	first_cpu_of_numa_node = cpumask_first(cpumask);
1406 	if (first_cpu_of_numa_node >= nr_cpu_ids)
1407 		return -1;
1408 #ifdef CONFIG_X86_64
1409 	return cpu_data(first_cpu_of_numa_node).apicid;
1410 #else
1411 	return first_cpu_of_numa_node;
1412 #endif
1413 }
1414 
1415 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
1416  *	of the given NUMA node (numa_node_id)
1417  * Return -1 on failure
1418  */
1419 int kfd_numa_node_to_apic_id(int numa_node_id)
1420 {
1421 	if (numa_node_id == -1) {
1422 		pr_warn("Invalid NUMA Node. Use online CPU mask\n");
1423 		return kfd_cpumask_to_apic_id(cpu_online_mask);
1424 	}
1425 	return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
1426 }
1427 
1428 #if defined(CONFIG_DEBUG_FS)
1429 
1430 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
1431 {
1432 	struct kfd_topology_device *dev;
1433 	unsigned int i = 0;
1434 	int r = 0;
1435 
1436 	down_read(&topology_lock);
1437 
1438 	list_for_each_entry(dev, &topology_device_list, list) {
1439 		if (!dev->gpu) {
1440 			i++;
1441 			continue;
1442 		}
1443 
1444 		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1445 		r = dqm_debugfs_hqds(m, dev->gpu->dqm);
1446 		if (r)
1447 			break;
1448 	}
1449 
1450 	up_read(&topology_lock);
1451 
1452 	return r;
1453 }
1454 
1455 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
1456 {
1457 	struct kfd_topology_device *dev;
1458 	unsigned int i = 0;
1459 	int r = 0;
1460 
1461 	down_read(&topology_lock);
1462 
1463 	list_for_each_entry(dev, &topology_device_list, list) {
1464 		if (!dev->gpu) {
1465 			i++;
1466 			continue;
1467 		}
1468 
1469 		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1470 		r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
1471 		if (r)
1472 			break;
1473 	}
1474 
1475 	up_read(&topology_lock);
1476 
1477 	return r;
1478 }
1479 
1480 #endif
1481