Lines Matching +full:gfx +full:- +full:mem

1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
46 /* topology_device_list - Master list of all topology devices */
60 if (top_dev->proximity_domain == proximity_domain) { in kfd_topology_device_by_proximity_domain_no_lock()
90 if (top_dev->gpu_id == gpu_id) { in kfd_topology_device_by_id()
108 return top_dev->gpu; in kfd_device_by_id()
114 struct kfd_mem_properties *mem; in kfd_release_topology_device() local
120 list_del(&dev->list); in kfd_release_topology_device()
122 while (dev->mem_props.next != &dev->mem_props) { in kfd_release_topology_device()
123 mem = container_of(dev->mem_props.next, in kfd_release_topology_device()
125 list_del(&mem->list); in kfd_release_topology_device()
126 kfree(mem); in kfd_release_topology_device()
129 while (dev->cache_props.next != &dev->cache_props) { in kfd_release_topology_device()
130 cache = container_of(dev->cache_props.next, in kfd_release_topology_device()
132 list_del(&cache->list); in kfd_release_topology_device()
136 while (dev->io_link_props.next != &dev->io_link_props) { in kfd_release_topology_device()
137 iolink = container_of(dev->io_link_props.next, in kfd_release_topology_device()
139 list_del(&iolink->list); in kfd_release_topology_device()
143 while (dev->p2p_link_props.next != &dev->p2p_link_props) { in kfd_release_topology_device()
144 p2plink = container_of(dev->p2p_link_props.next, in kfd_release_topology_device()
146 list_del(&p2plink->list); in kfd_release_topology_device()
150 while (dev->perf_props.next != &dev->perf_props) { in kfd_release_topology_device()
151 perf = container_of(dev->perf_props.next, in kfd_release_topology_device()
153 list_del(&perf->list); in kfd_release_topology_device()
188 INIT_LIST_HEAD(&dev->mem_props); in kfd_create_topology_device()
189 INIT_LIST_HEAD(&dev->cache_props); in kfd_create_topology_device()
190 INIT_LIST_HEAD(&dev->io_link_props); in kfd_create_topology_device()
191 INIT_LIST_HEAD(&dev->p2p_link_props); in kfd_create_topology_device()
192 INIT_LIST_HEAD(&dev->perf_props); in kfd_create_topology_device()
194 list_add_tail(&dev->list, device_list); in kfd_create_topology_device()
201 (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
231 offs = -EINVAL; in sysprops_show()
261 if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) in iolink_show()
262 return -EPERM; in iolink_show()
263 sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); in iolink_show()
264 sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); in iolink_show()
265 sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); in iolink_show()
266 sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); in iolink_show()
267 sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); in iolink_show()
268 sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); in iolink_show()
269 sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); in iolink_show()
270 sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); in iolink_show()
272 iolink->min_bandwidth); in iolink_show()
274 iolink->max_bandwidth); in iolink_show()
276 iolink->rec_transfer_size); in iolink_show()
278 iolink->rec_sdma_eng_id_mask); in iolink_show()
279 sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); in iolink_show()
297 struct kfd_mem_properties *mem; in mem_show() local
302 mem = container_of(attr, struct kfd_mem_properties, attr); in mem_show()
303 if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) in mem_show()
304 return -EPERM; in mem_show()
305 sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); in mem_show()
307 mem->size_in_bytes); in mem_show()
308 sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); in mem_show()
309 sysfs_show_32bit_prop(buffer, offs, "width", mem->width); in mem_show()
311 mem->mem_clk_max); in mem_show()
335 if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) in kfd_cache_show()
336 return -EPERM; in kfd_cache_show()
338 cache->processor_id_low); in kfd_cache_show()
339 sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); in kfd_cache_show()
340 sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); in kfd_cache_show()
342 cache->cacheline_size); in kfd_cache_show()
344 cache->cachelines_per_tag); in kfd_cache_show()
345 sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); in kfd_cache_show()
346 sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); in kfd_cache_show()
347 sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); in kfd_cache_show()
349 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); in kfd_cache_show()
350 for (i = 0; i < cache->sibling_map_size; i++) in kfd_cache_show()
351 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) in kfd_cache_show()
353 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", in kfd_cache_show()
354 (cache->sibling_map[i] >> j) & 1); in kfd_cache_show()
357 buffer[offs-1] = '\n'; in kfd_cache_show()
385 if (!attr->data) /* invalid data for PMC */ in perf_show()
388 return sysfs_show_32bit_val(buf, offs, attr->data); in perf_show()
414 if (strcmp(attr->name, "gpu_id") == 0) { in node_show()
417 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
418 return -EPERM; in node_show()
419 return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); in node_show()
422 if (strcmp(attr->name, "name") == 0) { in node_show()
426 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
427 return -EPERM; in node_show()
428 return sysfs_show_str_val(buffer, offs, dev->node_props.name); in node_show()
433 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
434 return -EPERM; in node_show()
436 dev->node_props.cpu_cores_count); in node_show()
438 dev->gpu ? dev->node_props.simd_count : 0); in node_show()
440 dev->node_props.mem_banks_count); in node_show()
442 dev->node_props.caches_count); in node_show()
444 dev->node_props.io_links_count); in node_show()
446 dev->node_props.p2p_links_count); in node_show()
448 dev->node_props.cpu_core_id_base); in node_show()
450 dev->node_props.simd_id_base); in node_show()
452 dev->node_props.max_waves_per_simd); in node_show()
454 dev->node_props.lds_size_in_kb); in node_show()
456 dev->node_props.gds_size_in_kb); in node_show()
458 dev->node_props.num_gws); in node_show()
460 dev->node_props.wave_front_size); in node_show()
462 dev->gpu ? (dev->node_props.array_count * in node_show()
463 NUM_XCC(dev->gpu->xcc_mask)) : 0); in node_show()
465 dev->node_props.simd_arrays_per_engine); in node_show()
467 dev->node_props.cu_per_simd_array); in node_show()
469 dev->node_props.simd_per_cu); in node_show()
471 dev->node_props.max_slots_scratch_cu); in node_show()
473 dev->node_props.gfx_target_version); in node_show()
475 dev->node_props.vendor_id); in node_show()
477 dev->node_props.device_id); in node_show()
479 dev->node_props.location_id); in node_show()
481 dev->node_props.domain); in node_show()
483 dev->node_props.drm_render_minor); in node_show()
485 dev->node_props.hive_id); in node_show()
487 dev->node_props.num_sdma_engines); in node_show()
489 dev->node_props.num_sdma_xgmi_engines); in node_show()
491 dev->node_props.num_sdma_queues_per_engine); in node_show()
493 dev->node_props.num_cp_queues); in node_show()
495 if (dev->gpu) { in node_show()
497 __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); in node_show()
500 dev->node_props.capability |= in node_show()
503 dev->node_props.capability |= in node_show()
509 if (dev->gpu->adev->asic_type == CHIP_TONGA) in node_show()
510 dev->node_props.capability |= in node_show()
513 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) && in node_show()
514 (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) in node_show()
515 dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; in node_show()
518 dev->node_props.max_engine_clk_fcompute); in node_show()
523 dev->gpu->kfd->mec_fw_version); in node_show()
525 dev->node_props.capability); in node_show()
527 dev->node_props.capability2); in node_show()
529 dev->node_props.debug_prop); in node_show()
531 dev->gpu->kfd->sdma_fw_version); in node_show()
533 dev->gpu->adev->unique_id); in node_show()
535 NUM_XCC(dev->gpu->xcc_mask)); in node_show()
563 struct kfd_mem_properties *mem; in kfd_remove_sysfs_node_entry() local
566 if (dev->kobj_iolink) { in kfd_remove_sysfs_node_entry()
567 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_remove_sysfs_node_entry()
568 if (iolink->kobj) { in kfd_remove_sysfs_node_entry()
569 kfd_remove_sysfs_file(iolink->kobj, in kfd_remove_sysfs_node_entry()
570 &iolink->attr); in kfd_remove_sysfs_node_entry()
571 iolink->kobj = NULL; in kfd_remove_sysfs_node_entry()
573 kobject_del(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
574 kobject_put(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
575 dev->kobj_iolink = NULL; in kfd_remove_sysfs_node_entry()
578 if (dev->kobj_p2plink) { in kfd_remove_sysfs_node_entry()
579 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_remove_sysfs_node_entry()
580 if (p2plink->kobj) { in kfd_remove_sysfs_node_entry()
581 kfd_remove_sysfs_file(p2plink->kobj, in kfd_remove_sysfs_node_entry()
582 &p2plink->attr); in kfd_remove_sysfs_node_entry()
583 p2plink->kobj = NULL; in kfd_remove_sysfs_node_entry()
585 kobject_del(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
586 kobject_put(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
587 dev->kobj_p2plink = NULL; in kfd_remove_sysfs_node_entry()
590 if (dev->kobj_cache) { in kfd_remove_sysfs_node_entry()
591 list_for_each_entry(cache, &dev->cache_props, list) in kfd_remove_sysfs_node_entry()
592 if (cache->kobj) { in kfd_remove_sysfs_node_entry()
593 kfd_remove_sysfs_file(cache->kobj, in kfd_remove_sysfs_node_entry()
594 &cache->attr); in kfd_remove_sysfs_node_entry()
595 cache->kobj = NULL; in kfd_remove_sysfs_node_entry()
597 kobject_del(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
598 kobject_put(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
599 dev->kobj_cache = NULL; in kfd_remove_sysfs_node_entry()
602 if (dev->kobj_mem) { in kfd_remove_sysfs_node_entry()
603 list_for_each_entry(mem, &dev->mem_props, list) in kfd_remove_sysfs_node_entry()
604 if (mem->kobj) { in kfd_remove_sysfs_node_entry()
605 kfd_remove_sysfs_file(mem->kobj, &mem->attr); in kfd_remove_sysfs_node_entry()
606 mem->kobj = NULL; in kfd_remove_sysfs_node_entry()
608 kobject_del(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
609 kobject_put(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
610 dev->kobj_mem = NULL; in kfd_remove_sysfs_node_entry()
613 if (dev->kobj_perf) { in kfd_remove_sysfs_node_entry()
614 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_remove_sysfs_node_entry()
615 kfree(perf->attr_group); in kfd_remove_sysfs_node_entry()
616 perf->attr_group = NULL; in kfd_remove_sysfs_node_entry()
618 kobject_del(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
619 kobject_put(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
620 dev->kobj_perf = NULL; in kfd_remove_sysfs_node_entry()
623 if (dev->kobj_node) { in kfd_remove_sysfs_node_entry()
624 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); in kfd_remove_sysfs_node_entry()
625 sysfs_remove_file(dev->kobj_node, &dev->attr_name); in kfd_remove_sysfs_node_entry()
626 sysfs_remove_file(dev->kobj_node, &dev->attr_props); in kfd_remove_sysfs_node_entry()
627 kobject_del(dev->kobj_node); in kfd_remove_sysfs_node_entry()
628 kobject_put(dev->kobj_node); in kfd_remove_sysfs_node_entry()
629 dev->kobj_node = NULL; in kfd_remove_sysfs_node_entry()
639 struct kfd_mem_properties *mem; in kfd_build_sysfs_node_entry() local
645 if (WARN_ON(dev->kobj_node)) in kfd_build_sysfs_node_entry()
646 return -EEXIST; in kfd_build_sysfs_node_entry()
651 dev->kobj_node = kfd_alloc_struct(dev->kobj_node); in kfd_build_sysfs_node_entry()
652 if (!dev->kobj_node) in kfd_build_sysfs_node_entry()
653 return -ENOMEM; in kfd_build_sysfs_node_entry()
655 ret = kobject_init_and_add(dev->kobj_node, &node_type, in kfd_build_sysfs_node_entry()
658 kobject_put(dev->kobj_node); in kfd_build_sysfs_node_entry()
662 dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); in kfd_build_sysfs_node_entry()
663 if (!dev->kobj_mem) in kfd_build_sysfs_node_entry()
664 return -ENOMEM; in kfd_build_sysfs_node_entry()
666 dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); in kfd_build_sysfs_node_entry()
667 if (!dev->kobj_cache) in kfd_build_sysfs_node_entry()
668 return -ENOMEM; in kfd_build_sysfs_node_entry()
670 dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
671 if (!dev->kobj_iolink) in kfd_build_sysfs_node_entry()
672 return -ENOMEM; in kfd_build_sysfs_node_entry()
674 dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
675 if (!dev->kobj_p2plink) in kfd_build_sysfs_node_entry()
676 return -ENOMEM; in kfd_build_sysfs_node_entry()
678 dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); in kfd_build_sysfs_node_entry()
679 if (!dev->kobj_perf) in kfd_build_sysfs_node_entry()
680 return -ENOMEM; in kfd_build_sysfs_node_entry()
685 dev->attr_gpuid.name = "gpu_id"; in kfd_build_sysfs_node_entry()
686 dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
687 sysfs_attr_init(&dev->attr_gpuid); in kfd_build_sysfs_node_entry()
688 dev->attr_name.name = "name"; in kfd_build_sysfs_node_entry()
689 dev->attr_name.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
690 sysfs_attr_init(&dev->attr_name); in kfd_build_sysfs_node_entry()
691 dev->attr_props.name = "properties"; in kfd_build_sysfs_node_entry()
692 dev->attr_props.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
693 sysfs_attr_init(&dev->attr_props); in kfd_build_sysfs_node_entry()
694 ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); in kfd_build_sysfs_node_entry()
697 ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); in kfd_build_sysfs_node_entry()
700 ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); in kfd_build_sysfs_node_entry()
705 list_for_each_entry(mem, &dev->mem_props, list) { in kfd_build_sysfs_node_entry()
706 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
707 if (!mem->kobj) in kfd_build_sysfs_node_entry()
708 return -ENOMEM; in kfd_build_sysfs_node_entry()
709 ret = kobject_init_and_add(mem->kobj, &mem_type, in kfd_build_sysfs_node_entry()
710 dev->kobj_mem, "%d", i); in kfd_build_sysfs_node_entry()
712 kobject_put(mem->kobj); in kfd_build_sysfs_node_entry()
716 mem->attr.name = "properties"; in kfd_build_sysfs_node_entry()
717 mem->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
718 sysfs_attr_init(&mem->attr); in kfd_build_sysfs_node_entry()
719 ret = sysfs_create_file(mem->kobj, &mem->attr); in kfd_build_sysfs_node_entry()
726 list_for_each_entry(cache, &dev->cache_props, list) { in kfd_build_sysfs_node_entry()
727 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
728 if (!cache->kobj) in kfd_build_sysfs_node_entry()
729 return -ENOMEM; in kfd_build_sysfs_node_entry()
730 ret = kobject_init_and_add(cache->kobj, &cache_type, in kfd_build_sysfs_node_entry()
731 dev->kobj_cache, "%d", i); in kfd_build_sysfs_node_entry()
733 kobject_put(cache->kobj); in kfd_build_sysfs_node_entry()
737 cache->attr.name = "properties"; in kfd_build_sysfs_node_entry()
738 cache->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
739 sysfs_attr_init(&cache->attr); in kfd_build_sysfs_node_entry()
740 ret = sysfs_create_file(cache->kobj, &cache->attr); in kfd_build_sysfs_node_entry()
747 list_for_each_entry(iolink, &dev->io_link_props, list) { in kfd_build_sysfs_node_entry()
748 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
749 if (!iolink->kobj) in kfd_build_sysfs_node_entry()
750 return -ENOMEM; in kfd_build_sysfs_node_entry()
751 ret = kobject_init_and_add(iolink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
752 dev->kobj_iolink, "%d", i); in kfd_build_sysfs_node_entry()
754 kobject_put(iolink->kobj); in kfd_build_sysfs_node_entry()
758 iolink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
759 iolink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
760 sysfs_attr_init(&iolink->attr); in kfd_build_sysfs_node_entry()
761 ret = sysfs_create_file(iolink->kobj, &iolink->attr); in kfd_build_sysfs_node_entry()
768 list_for_each_entry(p2plink, &dev->p2p_link_props, list) { in kfd_build_sysfs_node_entry()
769 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
770 if (!p2plink->kobj) in kfd_build_sysfs_node_entry()
771 return -ENOMEM; in kfd_build_sysfs_node_entry()
772 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
773 dev->kobj_p2plink, "%d", i); in kfd_build_sysfs_node_entry()
775 kobject_put(p2plink->kobj); in kfd_build_sysfs_node_entry()
779 p2plink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
780 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
781 sysfs_attr_init(&p2plink->attr); in kfd_build_sysfs_node_entry()
782 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_sysfs_node_entry()
790 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_build_sysfs_node_entry()
791 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) in kfd_build_sysfs_node_entry()
794 if (!perf->attr_group) in kfd_build_sysfs_node_entry()
795 return -ENOMEM; in kfd_build_sysfs_node_entry()
797 attrs = (struct attribute **)(perf->attr_group + 1); in kfd_build_sysfs_node_entry()
798 if (!strcmp(perf->block_name, "iommu")) { in kfd_build_sysfs_node_entry()
803 perf_attr_iommu[0].data = perf->max_concurrent; in kfd_build_sysfs_node_entry()
807 perf->attr_group->name = perf->block_name; in kfd_build_sysfs_node_entry()
808 perf->attr_group->attrs = attrs; in kfd_build_sysfs_node_entry()
809 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); in kfd_build_sysfs_node_entry()
851 return -ENOMEM; in kfd_topology_update_sysfs()
854 &sysprops_type, &kfd_device->kobj, in kfd_topology_update_sysfs()
864 return -ENOMEM; in kfd_topology_update_sysfs()
912 list_move_tail(temp_list->next, master_list); in kfd_topology_update_device_list()
926 if (dev->node_props.cpu_cores_count && in kfd_debug_print_topology()
927 dev->node_props.simd_count) { in kfd_debug_print_topology()
929 dev->node_props.device_id, in kfd_debug_print_topology()
930 dev->node_props.vendor_id); in kfd_debug_print_topology()
931 } else if (dev->node_props.cpu_cores_count) in kfd_debug_print_topology()
933 else if (dev->node_props.simd_count) in kfd_debug_print_topology()
935 dev->node_props.device_id, in kfd_debug_print_topology()
936 dev->node_props.vendor_id); in kfd_debug_print_topology()
952 sys_props.platform_id = dev->oem_id64; in kfd_update_system_properties()
953 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); in kfd_update_system_properties()
954 sys_props.platform_rev = dev->oem_revision; in kfd_update_system_properties()
962 struct kfd_mem_properties *mem; in find_system_memory() local
966 if (memdev->header.type != DMI_ENTRY_MEM_DEVICE) in find_system_memory()
968 if (memdev->header.length < sizeof(struct dmi_mem_device)) in find_system_memory()
971 list_for_each_entry(mem, &kdev->mem_props, list) { in find_system_memory()
972 if (memdev->total_width != 0xFFFF && memdev->total_width != 0) in find_system_memory()
973 mem->width = memdev->total_width; in find_system_memory()
974 if (memdev->speed != 0) in find_system_memory()
975 mem->mem_clk_max = memdev->speed; in find_system_memory()
979 /* kfd_add_non_crat_information - Add information that is not currently
981 * @dev - topology device to which addition info is added
986 if (!kdev->gpu) { in kfd_add_non_crat_information()
1003 /* topology_device_list - Master list of all topology devices in kfd_topology_init()
1004 * temp_topology_device_list - temporary list created while parsing CRAT in kfd_topology_init()
1046 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_init()
1095 local_mem_size = gpu->local_mem_info.local_mem_size_private + in kfd_generate_gpu_id()
1096 gpu->local_mem_info.local_mem_size_public; in kfd_generate_gpu_id()
1097 buf[0] = gpu->adev->pdev->devfn; in kfd_generate_gpu_id()
1098 buf[1] = gpu->adev->pdev->subsystem_vendor | in kfd_generate_gpu_id()
1099 (gpu->adev->pdev->subsystem_device << 16); in kfd_generate_gpu_id()
1100 buf[2] = pci_domain_nr(gpu->adev->pdev->bus); in kfd_generate_gpu_id()
1101 buf[3] = gpu->adev->pdev->device; in kfd_generate_gpu_id()
1102 buf[4] = gpu->adev->pdev->bus->number; in kfd_generate_gpu_id()
1105 buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); in kfd_generate_gpu_id()
1108 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1112 * that the value could be 0 or non-unique. So, check if in kfd_generate_gpu_id()
1113 * it is unique and non-zero. If not unique increment till in kfd_generate_gpu_id()
1123 if (dev->gpu && dev->gpu_id == gpu_id) { in kfd_generate_gpu_id()
1130 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1136 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1145 struct kfd_mem_properties *mem; in kfd_assign_gpu() local
1154 if (dev->node_props.cpu_cores_count) in kfd_assign_gpu()
1157 if (!dev->gpu && (dev->node_props.simd_count > 0)) { in kfd_assign_gpu()
1158 dev->gpu = gpu; in kfd_assign_gpu()
1161 list_for_each_entry(mem, &dev->mem_props, list) in kfd_assign_gpu()
1162 mem->gpu = dev->gpu; in kfd_assign_gpu()
1163 list_for_each_entry(cache, &dev->cache_props, list) in kfd_assign_gpu()
1164 cache->gpu = dev->gpu; in kfd_assign_gpu()
1165 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_assign_gpu()
1166 iolink->gpu = dev->gpu; in kfd_assign_gpu()
1167 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_assign_gpu()
1168 p2plink->gpu = dev->gpu; in kfd_assign_gpu()
1183 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1188 struct kfd_mem_properties *mem; in kfd_fill_mem_clk_max_info() local
1196 * for dGPUs - VCRAT reports only one bank of Local Memory in kfd_fill_mem_clk_max_info()
1197 * for APUs - If CRAT from ACPI reports more than one bank, then in kfd_fill_mem_clk_max_info()
1200 amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, in kfd_fill_mem_clk_max_info()
1201 dev->gpu->xcp); in kfd_fill_mem_clk_max_info()
1203 list_for_each_entry(mem, &dev->mem_props, list) in kfd_fill_mem_clk_max_info()
1204 mem->mem_clk_max = local_mem_info.mem_clk_max; in kfd_fill_mem_clk_max_info()
1212 if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_set_iolink_no_atomics()
1219 pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, in kfd_set_iolink_no_atomics()
1224 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1228 if (!dev->gpu->kfd->pci_atomic_requested || in kfd_set_iolink_no_atomics()
1229 dev->gpu->adev->asic_type == CHIP_HAWAII) in kfd_set_iolink_no_atomics()
1230 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1239 /* CPU -> GPU with PCIe */ in kfd_set_iolink_non_coherent()
1240 if (!to_dev->gpu && in kfd_set_iolink_non_coherent()
1241 inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_set_iolink_non_coherent()
1242 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1244 if (to_dev->gpu) { in kfd_set_iolink_non_coherent()
1245 /* GPU <-> GPU with PCIe and in kfd_set_iolink_non_coherent()
1248 if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || in kfd_set_iolink_non_coherent()
1249 (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_iolink_non_coherent()
1250 KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { in kfd_set_iolink_non_coherent()
1251 outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1252 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1259 { -1, 14, 12, 2, 4, 8, 10, 6 },
1260 { 14, -1, 2, 10, 8, 4, 6, 12 },
1261 { 10, 2, -1, 12, 14, 6, 4, 8 },
1262 { 2, 12, 10, -1, 6, 14, 8, 4 },
1263 { 4, 8, 14, 6, -1, 10, 12, 2 },
1264 { 8, 4, 6, 14, 12, -1, 2, 10 },
1265 { 10, 6, 4, 8, 12, 2, -1, 14 },
1266 { 6, 12, 8, 4, 2, 10, 14, -1 }};
1272 struct kfd_node *gpu = outbound_link->gpu; in kfd_set_recommended_sdma_engines()
1273 struct amdgpu_device *adev = gpu->adev; in kfd_set_recommended_sdma_engines()
1274 unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; in kfd_set_recommended_sdma_engines()
1277 uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1; in kfd_set_recommended_sdma_engines()
1279 ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines; in kfd_set_recommended_sdma_engines()
1281 bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && in kfd_set_recommended_sdma_engines()
1282 adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && in kfd_set_recommended_sdma_engines()
1283 num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) && in kfd_set_recommended_sdma_engines()
1287 int src_socket_id = adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1288 int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1291 outbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1293 inbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1297 if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) in kfd_set_recommended_sdma_engines()
1298 outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; in kfd_set_recommended_sdma_engines()
1299 if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) in kfd_set_recommended_sdma_engines()
1300 inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; in kfd_set_recommended_sdma_engines()
1303 uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_recommended_sdma_engines()
1304 num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask : in kfd_set_recommended_sdma_engines()
1307 outbound_link->rec_sdma_eng_id_mask = engine_mask; in kfd_set_recommended_sdma_engines()
1308 inbound_link->rec_sdma_eng_id_mask = engine_mask; in kfd_set_recommended_sdma_engines()
1317 if (!dev || !dev->gpu) in kfd_fill_iolink_non_crat_info()
1321 list_for_each_entry(link, &dev->io_link_props, list) { in kfd_fill_iolink_non_crat_info()
1322 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1325 link->node_to); in kfd_fill_iolink_non_crat_info()
1331 if (!peer_dev->gpu && in kfd_fill_iolink_non_crat_info()
1332 link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { in kfd_fill_iolink_non_crat_info()
1337 if (!dev->node_props.hive_id) in kfd_fill_iolink_non_crat_info()
1338 dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); in kfd_fill_iolink_non_crat_info()
1339 peer_dev->node_props.hive_id = dev->node_props.hive_id; in kfd_fill_iolink_non_crat_info()
1342 list_for_each_entry(inbound_link, &peer_dev->io_link_props, in kfd_fill_iolink_non_crat_info()
1344 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1347 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1355 list_for_each_entry(link, &dev->p2p_link_props, list) { in kfd_fill_iolink_non_crat_info()
1356 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1359 link->node_to); in kfd_fill_iolink_non_crat_info()
1364 list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, in kfd_fill_iolink_non_crat_info()
1366 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1369 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1381 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_p2p_node_entry()
1382 if (!p2plink->kobj) in kfd_build_p2p_node_entry()
1383 return -ENOMEM; in kfd_build_p2p_node_entry()
1385 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_p2p_node_entry()
1386 dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); in kfd_build_p2p_node_entry()
1388 kobject_put(p2plink->kobj); in kfd_build_p2p_node_entry()
1392 p2plink->attr.name = "properties"; in kfd_build_p2p_node_entry()
1393 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_p2p_node_entry()
1394 sysfs_attr_init(&p2plink->attr); in kfd_build_p2p_node_entry()
1395 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_p2p_node_entry()
1412 if (cpu_dev->gpu) in kfd_create_indirect_link_prop()
1417 if (list_empty(&kdev->io_link_props)) in kfd_create_indirect_link_prop()
1418 return -ENODATA; in kfd_create_indirect_link_prop()
1420 gpu_link = list_first_entry(&kdev->io_link_props, in kfd_create_indirect_link_prop()
1424 /* CPU <--> GPU */ in kfd_create_indirect_link_prop()
1425 if (gpu_link->node_to == i) in kfd_create_indirect_link_prop()
1428 /* find CPU <--> CPU links */ in kfd_create_indirect_link_prop()
1433 &cpu_dev->io_link_props, list) { in kfd_create_indirect_link_prop()
1434 if (tmp_link->node_to == gpu_link->node_to) { in kfd_create_indirect_link_prop()
1442 return -ENOMEM; in kfd_create_indirect_link_prop()
1444 /* CPU <--> CPU <--> GPU, GPU node*/ in kfd_create_indirect_link_prop()
1447 return -ENOMEM; in kfd_create_indirect_link_prop()
1450 props->weight = gpu_link->weight + cpu_link->weight; in kfd_create_indirect_link_prop()
1451 props->min_latency = gpu_link->min_latency + cpu_link->min_latency; in kfd_create_indirect_link_prop()
1452 props->max_latency = gpu_link->max_latency + cpu_link->max_latency; in kfd_create_indirect_link_prop()
1453 props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); in kfd_create_indirect_link_prop()
1454 props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); in kfd_create_indirect_link_prop()
1456 props->node_from = gpu_node; in kfd_create_indirect_link_prop()
1457 props->node_to = i; in kfd_create_indirect_link_prop()
1458 kdev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1459 list_add_tail(&props->list, &kdev->p2p_link_props); in kfd_create_indirect_link_prop()
1464 /* for small Bar, no CPU --> GPU in-direct links */ in kfd_create_indirect_link_prop()
1465 if (kfd_dev_is_large_bar(kdev->gpu)) { in kfd_create_indirect_link_prop()
1466 /* CPU <--> CPU <--> GPU, CPU node*/ in kfd_create_indirect_link_prop()
1469 return -ENOMEM; in kfd_create_indirect_link_prop()
1472 props2->node_from = i; in kfd_create_indirect_link_prop()
1473 props2->node_to = gpu_node; in kfd_create_indirect_link_prop()
1474 props2->kobj = NULL; in kfd_create_indirect_link_prop()
1475 cpu_dev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1476 list_add_tail(&props2->list, &cpu_dev->p2p_link_props); in kfd_create_indirect_link_prop()
1495 kdev->gpu->adev, in kfd_add_peer_prop()
1496 peer->gpu->adev)) in kfd_add_peer_prop()
1499 if (list_empty(&kdev->io_link_props)) in kfd_add_peer_prop()
1500 return -ENODATA; in kfd_add_peer_prop()
1502 iolink1 = list_first_entry(&kdev->io_link_props, in kfd_add_peer_prop()
1505 if (list_empty(&peer->io_link_props)) in kfd_add_peer_prop()
1506 return -ENODATA; in kfd_add_peer_prop()
1508 iolink2 = list_first_entry(&peer->io_link_props, in kfd_add_peer_prop()
1513 return -ENOMEM; in kfd_add_peer_prop()
1517 props->weight = iolink1->weight + iolink2->weight; in kfd_add_peer_prop()
1518 props->min_latency = iolink1->min_latency + iolink2->min_latency; in kfd_add_peer_prop()
1519 props->max_latency = iolink1->max_latency + iolink2->max_latency; in kfd_add_peer_prop()
1520 props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); in kfd_add_peer_prop()
1521 props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); in kfd_add_peer_prop()
1523 if (iolink1->node_to != iolink2->node_to) { in kfd_add_peer_prop()
1524 /* CPU->CPU link*/ in kfd_add_peer_prop()
1525 cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); in kfd_add_peer_prop()
1527 list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { in kfd_add_peer_prop()
1528 if (iolink3->node_to != iolink2->node_to) in kfd_add_peer_prop()
1531 props->weight += iolink3->weight; in kfd_add_peer_prop()
1532 props->min_latency += iolink3->min_latency; in kfd_add_peer_prop()
1533 props->max_latency += iolink3->max_latency; in kfd_add_peer_prop()
1534 props->min_bandwidth = min(props->min_bandwidth, in kfd_add_peer_prop()
1535 iolink3->min_bandwidth); in kfd_add_peer_prop()
1536 props->max_bandwidth = min(props->max_bandwidth, in kfd_add_peer_prop()
1537 iolink3->max_bandwidth); in kfd_add_peer_prop()
1545 props->node_from = from; in kfd_add_peer_prop()
1546 props->node_to = to; in kfd_add_peer_prop()
1547 peer->node_props.p2p_links_count++; in kfd_add_peer_prop()
1548 list_add_tail(&props->list, &peer->p2p_link_props); in kfd_add_peer_prop()
1572 if (WARN_ON(!new_dev->gpu)) in kfd_dev_create_p2p_links()
1575 k--; in kfd_dev_create_p2p_links()
1577 /* create in-direct links */ in kfd_dev_create_p2p_links()
1588 if (!dev->gpu || !dev->gpu->adev || in kfd_dev_create_p2p_links()
1589 (dev->gpu->kfd->hive_id && in kfd_dev_create_p2p_links()
1590 dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) in kfd_dev_create_p2p_links()
1593 /* check if node(s) is/are peer accessible in one direction or bi-direction */ in kfd_dev_create_p2p_links()
1623 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l1_pcache()
1627 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l1_pcache()
1633 return -ENOMEM; in fill_in_l1_pcache()
1636 pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); in fill_in_l1_pcache()
1637 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l1_pcache()
1638 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l1_pcache()
1639 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l1_pcache()
1642 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l1_pcache()
1644 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l1_pcache()
1646 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l1_pcache()
1648 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l1_pcache()
1654 cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l1_pcache()
1656 pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l1_pcache()
1657 pcache->sibling_map[1] = in fill_in_l1_pcache()
1659 pcache->sibling_map[2] = in fill_in_l1_pcache()
1661 pcache->sibling_map[3] = in fill_in_l1_pcache()
1664 pcache->sibling_map_size = 4; in fill_in_l1_pcache()
1683 int num_xcc = NUM_XCC(knode->xcc_mask); in fill_in_l2_l3_pcache()
1686 struct amdgpu_device *adev = knode->adev; in fill_in_l2_l3_pcache()
1689 start = ffs(knode->xcc_mask) - 1; in fill_in_l2_l3_pcache()
1696 for (i = 0; i < gfx_info->max_shader_engines && !found; i++) { in fill_in_l2_l3_pcache()
1697 for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) { in fill_in_l2_l3_pcache()
1698 if (cu_info->bitmap[start][i % 4][j % 4]) { in fill_in_l2_l3_pcache()
1700 cu_info->bitmap[start][i % 4][j % 4]; in fill_in_l2_l3_pcache()
1707 ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1711 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l2_l3_pcache()
1717 return -ENOMEM; in fill_in_l2_l3_pcache()
1720 pcache->processor_id_low = cu_processor_id in fill_in_l2_l3_pcache()
1721 + (first_active_cu - 1); in fill_in_l2_l3_pcache()
1722 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l2_l3_pcache()
1723 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l2_l3_pcache()
1728 mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); in fill_in_l2_l3_pcache()
1732 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l2_l3_pcache()
1734 if (mode && pcache->cache_level == 3) in fill_in_l2_l3_pcache()
1735 pcache->cache_size /= mode; in fill_in_l2_l3_pcache()
1738 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l2_l3_pcache()
1740 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l2_l3_pcache()
1742 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l2_l3_pcache()
1744 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l2_l3_pcache()
1749 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l2_l3_pcache()
1753 for (i = 0; i < gfx_info->max_shader_engines; i++) { in fill_in_l2_l3_pcache()
1754 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in fill_in_l2_l3_pcache()
1755 pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l2_l3_pcache()
1756 pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); in fill_in_l2_l3_pcache()
1757 pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); in fill_in_l2_l3_pcache()
1758 pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); in fill_in_l2_l3_pcache()
1761 cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4]; in fill_in_l2_l3_pcache()
1762 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1766 pcache->sibling_map_size = k; in fill_in_l2_l3_pcache()
1775 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
1786 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_fill_cache_non_crat_info()
1787 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_fill_cache_non_crat_info()
1795 gpu_processor_id = dev->node_props.simd_id_base; in kfd_fill_cache_non_crat_info()
1814 start = ffs(kdev->xcc_mask) - 1; in kfd_fill_cache_non_crat_info()
1815 end = start + NUM_XCC(kdev->xcc_mask); in kfd_fill_cache_non_crat_info()
1821 for (i = 0; i < gfx_info->max_shader_engines; i++) { in kfd_fill_cache_non_crat_info()
1822 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in kfd_fill_cache_non_crat_info()
1823 for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) { in kfd_fill_cache_non_crat_info()
1826 cu_info->bitmap[xcc][i % 4][j + i / 4], ct, in kfd_fill_cache_non_crat_info()
1834 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1839 gfx_info->max_cu_per_sh) ? in kfd_fill_cache_non_crat_info()
1841 (gfx_info->max_cu_per_sh - k); in kfd_fill_cache_non_crat_info()
1856 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1860 dev->node_props.caches_count += num_of_entries; in kfd_fill_cache_non_crat_info()
1877 dev_err(gpu->adev->dev, "Error creating VCRAT\n"); in kfd_topology_add_device_locked()
1878 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1888 dev_err(gpu->adev->dev, "Error parsing VCRAT\n"); in kfd_topology_add_device_locked()
1889 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1898 res = -ENODEV; in kfd_topology_add_device_locked()
1914 dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n", in kfd_topology_add_device_locked()
1926 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && in kfd_topology_set_dbg_firmware_support()
1927 KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { in kfd_topology_set_dbg_firmware_support()
1928 uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1931 uint32_t mes_rev = dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1942 switch (KFD_GC_VERSION(dev->gpu)) { in kfd_topology_set_dbg_firmware_support()
1944 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; in kfd_topology_set_dbg_firmware_support()
1951 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; in kfd_topology_set_dbg_firmware_support()
1954 firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; in kfd_topology_set_dbg_firmware_support()
1957 firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; in kfd_topology_set_dbg_firmware_support()
1962 firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; in kfd_topology_set_dbg_firmware_support()
1969 firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; in kfd_topology_set_dbg_firmware_support()
1981 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; in kfd_topology_set_dbg_firmware_support()
1986 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << in kfd_topology_set_capabilities()
1990 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | in kfd_topology_set_capabilities()
1994 if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) in kfd_topology_set_capabilities()
1995 dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; in kfd_topology_set_capabilities()
1997 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { in kfd_topology_set_capabilities()
1998 if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) || in kfd_topology_set_capabilities()
1999 KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4)) in kfd_topology_set_capabilities()
2000 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
2004 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
2008 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) in kfd_topology_set_capabilities()
2009 dev->node_props.capability |= in kfd_topology_set_capabilities()
2012 if (!amdgpu_sriov_vf(dev->gpu->adev)) in kfd_topology_set_capabilities()
2013 dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; in kfd_topology_set_capabilities()
2016 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | in kfd_topology_set_capabilities()
2019 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) in kfd_topology_set_capabilities()
2020 dev->node_props.capability |= in kfd_topology_set_capabilities()
2033 const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; in kfd_topology_add_device()
2034 struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; in kfd_topology_add_device()
2035 struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; in kfd_topology_add_device()
2037 if (gpu->xcp && !gpu->xcp->ddev) { in kfd_topology_add_device()
2038 dev_warn(gpu->adev->dev, in kfd_topology_add_device()
2042 dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n"); in kfd_topology_add_device()
2060 dev->gpu_id = gpu_id; in kfd_topology_add_device()
2061 gpu->id = gpu_id; in kfd_topology_add_device()
2069 /* Fill-in additional information that is not available in CRAT but in kfd_topology_add_device()
2072 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { in kfd_topology_add_device()
2073 dev->node_props.name[i] = __tolower(asic_name[i]); in kfd_topology_add_device()
2077 dev->node_props.name[i] = '\0'; in kfd_topology_add_device()
2079 dev->node_props.simd_arrays_per_engine = in kfd_topology_add_device()
2080 gfx_info->max_sh_per_se; in kfd_topology_add_device()
2082 dev->node_props.gfx_target_version = in kfd_topology_add_device()
2083 gpu->kfd->device_info.gfx_target_version; in kfd_topology_add_device()
2084 dev->node_props.vendor_id = gpu->adev->pdev->vendor; in kfd_topology_add_device()
2085 dev->node_props.device_id = gpu->adev->pdev->device; in kfd_topology_add_device()
2086 dev->node_props.capability |= in kfd_topology_add_device()
2087 ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & in kfd_topology_add_device()
2090 dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); in kfd_topology_add_device()
2091 if (gpu->kfd->num_nodes > 1) in kfd_topology_add_device()
2092 dev->node_props.location_id |= dev->gpu->node_id; in kfd_topology_add_device()
2094 dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); in kfd_topology_add_device()
2095 dev->node_props.max_engine_clk_fcompute = in kfd_topology_add_device()
2096 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); in kfd_topology_add_device()
2097 dev->node_props.max_engine_clk_ccompute = in kfd_topology_add_device()
2100 if (gpu->xcp) in kfd_topology_add_device()
2101 dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; in kfd_topology_add_device()
2103 dev->node_props.drm_render_minor = in kfd_topology_add_device()
2104 gpu->kfd->shared_resources.drm_render_minor; in kfd_topology_add_device()
2106 dev->node_props.hive_id = gpu->kfd->hive_id; in kfd_topology_add_device()
2107 dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); in kfd_topology_add_device()
2108 dev->node_props.num_sdma_xgmi_engines = in kfd_topology_add_device()
2110 dev->node_props.num_sdma_queues_per_engine = in kfd_topology_add_device()
2111 gpu->kfd->device_info.num_sdma_queues_per_engine - in kfd_topology_add_device()
2112 gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; in kfd_topology_add_device()
2113 dev->node_props.num_gws = (dev->gpu->gws && in kfd_topology_add_device()
2114 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? in kfd_topology_add_device()
2115 dev->gpu->adev->gds.gws_size : 0; in kfd_topology_add_device()
2116 dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); in kfd_topology_add_device()
2121 switch (dev->gpu->adev->asic_type) { in kfd_topology_add_device()
2125 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << in kfd_topology_add_device()
2136 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << in kfd_topology_add_device()
2141 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2143 dev->gpu->adev->asic_type); in kfd_topology_add_device()
2152 dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; in kfd_topology_add_device()
2159 if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { in kfd_topology_add_device()
2160 dev->node_props.simd_count = in kfd_topology_add_device()
2161 cu_info->simd_per_cu * cu_info->number; in kfd_topology_add_device()
2162 dev->node_props.max_waves_per_simd = 10; in kfd_topology_add_device()
2165 /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ in kfd_topology_add_device()
2166 dev->node_props.capability |= in kfd_topology_add_device()
2167 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? in kfd_topology_add_device()
2169 dev->node_props.capability |= in kfd_topology_add_device()
2170 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? in kfd_topology_add_device()
2173 if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2174 dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? in kfd_topology_add_device()
2177 if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) in kfd_topology_add_device()
2178 dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; in kfd_topology_add_device()
2180 if (dev->gpu->adev->gmc.is_app_apu || in kfd_topology_add_device()
2181 dev->gpu->adev->gmc.xgmi.connected_to_cpu) in kfd_topology_add_device()
2182 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; in kfd_topology_add_device()
2194 * kfd_topology_update_io_links() - Update IO links after device removal.
2218 if (dev->proximity_domain > proximity_domain) in kfd_topology_update_io_links()
2219 dev->proximity_domain--; in kfd_topology_update_io_links()
2221 list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { in kfd_topology_update_io_links()
2226 if (iolink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2227 list_del(&iolink->list); in kfd_topology_update_io_links()
2228 dev->node_props.io_links_count--; in kfd_topology_update_io_links()
2230 if (iolink->node_from > proximity_domain) in kfd_topology_update_io_links()
2231 iolink->node_from--; in kfd_topology_update_io_links()
2232 if (iolink->node_to > proximity_domain) in kfd_topology_update_io_links()
2233 iolink->node_to--; in kfd_topology_update_io_links()
2237 list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { in kfd_topology_update_io_links()
2242 if (p2plink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2243 list_del(&p2plink->list); in kfd_topology_update_io_links()
2244 dev->node_props.p2p_links_count--; in kfd_topology_update_io_links()
2246 if (p2plink->node_from > proximity_domain) in kfd_topology_update_io_links()
2247 p2plink->node_from--; in kfd_topology_update_io_links()
2248 if (p2plink->node_to > proximity_domain) in kfd_topology_update_io_links()
2249 p2plink->node_to--; in kfd_topology_update_io_links()
2259 int res = -ENODEV; in kfd_topology_remove_device()
2265 if (dev->gpu == gpu) { in kfd_topology_remove_device()
2266 gpu_id = dev->gpu_id; in kfd_topology_remove_device()
2269 sys_props.num_devices--; in kfd_topology_remove_device()
2271 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_remove_device()
2289 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
2292 * Return - 0: On success (@kdev will be NULL for non GPU nodes)
2293 * -1: If end of list
2306 *kdev = top_dev->gpu; in kfd_topology_enum_kfd_devices()
2316 return -1; in kfd_topology_enum_kfd_devices()
2325 return -1; in kfd_cpumask_to_apic_id()
2328 return -1; in kfd_cpumask_to_apic_id()
2336 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
2338 * Return -1 on failure
2342 if (numa_node_id == -1) { in kfd_numa_node_to_apic_id()
2360 if (!dev->gpu) { in kfd_debugfs_hqds_by_device()
2365 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_hqds_by_device()
2366 r = dqm_debugfs_hqds(m, dev->gpu->dqm); in kfd_debugfs_hqds_by_device()
2385 if (!dev->gpu) { in kfd_debugfs_rls_by_device()
2390 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_rls_by_device()
2391 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); in kfd_debugfs_rls_by_device()