Lines Matching +full:gpu +full:- +full:id
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
46 /* topology_device_list - Master list of all topology devices */
60 if (top_dev->proximity_domain == proximity_domain) { in kfd_topology_device_by_proximity_domain_no_lock()
90 if (top_dev->gpu_id == gpu_id) { in kfd_topology_device_by_id()
108 return top_dev->gpu; in kfd_device_by_id()
120 list_del(&dev->list); in kfd_release_topology_device()
122 while (dev->mem_props.next != &dev->mem_props) { in kfd_release_topology_device()
123 mem = container_of(dev->mem_props.next, in kfd_release_topology_device()
125 list_del(&mem->list); in kfd_release_topology_device()
129 while (dev->cache_props.next != &dev->cache_props) { in kfd_release_topology_device()
130 cache = container_of(dev->cache_props.next, in kfd_release_topology_device()
132 list_del(&cache->list); in kfd_release_topology_device()
136 while (dev->io_link_props.next != &dev->io_link_props) { in kfd_release_topology_device()
137 iolink = container_of(dev->io_link_props.next, in kfd_release_topology_device()
139 list_del(&iolink->list); in kfd_release_topology_device()
143 while (dev->p2p_link_props.next != &dev->p2p_link_props) { in kfd_release_topology_device()
144 p2plink = container_of(dev->p2p_link_props.next, in kfd_release_topology_device()
146 list_del(&p2plink->list); in kfd_release_topology_device()
150 while (dev->perf_props.next != &dev->perf_props) { in kfd_release_topology_device()
151 perf = container_of(dev->perf_props.next, in kfd_release_topology_device()
153 list_del(&perf->list); in kfd_release_topology_device()
188 INIT_LIST_HEAD(&dev->mem_props); in kfd_create_topology_device()
189 INIT_LIST_HEAD(&dev->cache_props); in kfd_create_topology_device()
190 INIT_LIST_HEAD(&dev->io_link_props); in kfd_create_topology_device()
191 INIT_LIST_HEAD(&dev->p2p_link_props); in kfd_create_topology_device()
192 INIT_LIST_HEAD(&dev->perf_props); in kfd_create_topology_device()
194 list_add_tail(&dev->list, device_list); in kfd_create_topology_device()
201 (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
231 offs = -EINVAL; in sysprops_show()
261 if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) in iolink_show()
262 return -EPERM; in iolink_show()
263 sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); in iolink_show()
264 sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); in iolink_show()
265 sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); in iolink_show()
266 sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); in iolink_show()
267 sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); in iolink_show()
268 sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); in iolink_show()
269 sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); in iolink_show()
270 sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); in iolink_show()
272 iolink->min_bandwidth); in iolink_show()
274 iolink->max_bandwidth); in iolink_show()
276 iolink->rec_transfer_size); in iolink_show()
278 iolink->rec_sdma_eng_id_mask); in iolink_show()
279 sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); in iolink_show()
303 if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) in mem_show()
304 return -EPERM; in mem_show()
305 sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); in mem_show()
307 mem->size_in_bytes); in mem_show()
308 sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); in mem_show()
309 sysfs_show_32bit_prop(buffer, offs, "width", mem->width); in mem_show()
311 mem->mem_clk_max); in mem_show()
335 if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) in kfd_cache_show()
336 return -EPERM; in kfd_cache_show()
338 cache->processor_id_low); in kfd_cache_show()
339 sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); in kfd_cache_show()
340 sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); in kfd_cache_show()
342 cache->cacheline_size); in kfd_cache_show()
344 cache->cachelines_per_tag); in kfd_cache_show()
345 sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); in kfd_cache_show()
346 sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); in kfd_cache_show()
347 sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); in kfd_cache_show()
349 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); in kfd_cache_show()
350 for (i = 0; i < cache->sibling_map_size; i++) in kfd_cache_show()
351 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) in kfd_cache_show()
353 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", in kfd_cache_show()
354 (cache->sibling_map[i] >> j) & 1); in kfd_cache_show()
357 buffer[offs-1] = '\n'; in kfd_cache_show()
385 if (!attr->data) /* invalid data for PMC */ in perf_show()
388 return sysfs_show_32bit_val(buf, offs, attr->data); in perf_show()
414 if (strcmp(attr->name, "gpu_id") == 0) { in node_show()
417 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
418 return -EPERM; in node_show()
419 return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); in node_show()
422 if (strcmp(attr->name, "name") == 0) { in node_show()
426 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
427 return -EPERM; in node_show()
428 return sysfs_show_str_val(buffer, offs, dev->node_props.name); in node_show()
433 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
434 return -EPERM; in node_show()
436 dev->node_props.cpu_cores_count); in node_show()
438 dev->gpu ? dev->node_props.simd_count : 0); in node_show()
440 dev->node_props.mem_banks_count); in node_show()
442 dev->node_props.caches_count); in node_show()
444 dev->node_props.io_links_count); in node_show()
446 dev->node_props.p2p_links_count); in node_show()
448 dev->node_props.cpu_core_id_base); in node_show()
450 dev->node_props.simd_id_base); in node_show()
452 dev->node_props.max_waves_per_simd); in node_show()
454 dev->node_props.lds_size_in_kb); in node_show()
456 dev->node_props.gds_size_in_kb); in node_show()
458 dev->node_props.num_gws); in node_show()
460 dev->node_props.wave_front_size); in node_show()
462 dev->gpu ? (dev->node_props.array_count * in node_show()
463 NUM_XCC(dev->gpu->xcc_mask)) : 0); in node_show()
465 dev->node_props.simd_arrays_per_engine); in node_show()
467 dev->node_props.cu_per_simd_array); in node_show()
469 dev->node_props.simd_per_cu); in node_show()
471 dev->node_props.max_slots_scratch_cu); in node_show()
473 dev->node_props.gfx_target_version); in node_show()
475 dev->node_props.vendor_id); in node_show()
477 dev->node_props.device_id); in node_show()
479 dev->node_props.location_id); in node_show()
481 dev->node_props.domain); in node_show()
483 dev->node_props.drm_render_minor); in node_show()
485 dev->node_props.hive_id); in node_show()
487 dev->node_props.num_sdma_engines); in node_show()
489 dev->node_props.num_sdma_xgmi_engines); in node_show()
491 dev->node_props.num_sdma_queues_per_engine); in node_show()
493 dev->node_props.num_cp_queues); in node_show()
495 if (dev->gpu) { in node_show()
497 __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); in node_show()
500 dev->node_props.capability |= in node_show()
503 dev->node_props.capability |= in node_show()
509 if (dev->gpu->adev->asic_type == CHIP_TONGA) in node_show()
510 dev->node_props.capability |= in node_show()
513 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) && in node_show()
514 (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) in node_show()
515 dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; in node_show()
518 dev->node_props.max_engine_clk_fcompute); in node_show()
523 dev->gpu->kfd->mec_fw_version); in node_show()
525 dev->node_props.capability); in node_show()
527 dev->node_props.capability2); in node_show()
529 dev->node_props.debug_prop); in node_show()
531 dev->gpu->kfd->sdma_fw_version); in node_show()
533 dev->gpu->xcp ? in node_show()
534 dev->gpu->xcp->unique_id : in node_show()
535 dev->gpu->adev->unique_id); in node_show()
537 NUM_XCC(dev->gpu->xcc_mask)); in node_show()
568 if (dev->kobj_iolink) { in kfd_remove_sysfs_node_entry()
569 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_remove_sysfs_node_entry()
570 if (iolink->kobj) { in kfd_remove_sysfs_node_entry()
571 kfd_remove_sysfs_file(iolink->kobj, in kfd_remove_sysfs_node_entry()
572 &iolink->attr); in kfd_remove_sysfs_node_entry()
573 iolink->kobj = NULL; in kfd_remove_sysfs_node_entry()
575 kobject_del(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
576 kobject_put(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
577 dev->kobj_iolink = NULL; in kfd_remove_sysfs_node_entry()
580 if (dev->kobj_p2plink) { in kfd_remove_sysfs_node_entry()
581 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_remove_sysfs_node_entry()
582 if (p2plink->kobj) { in kfd_remove_sysfs_node_entry()
583 kfd_remove_sysfs_file(p2plink->kobj, in kfd_remove_sysfs_node_entry()
584 &p2plink->attr); in kfd_remove_sysfs_node_entry()
585 p2plink->kobj = NULL; in kfd_remove_sysfs_node_entry()
587 kobject_del(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
588 kobject_put(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
589 dev->kobj_p2plink = NULL; in kfd_remove_sysfs_node_entry()
592 if (dev->kobj_cache) { in kfd_remove_sysfs_node_entry()
593 list_for_each_entry(cache, &dev->cache_props, list) in kfd_remove_sysfs_node_entry()
594 if (cache->kobj) { in kfd_remove_sysfs_node_entry()
595 kfd_remove_sysfs_file(cache->kobj, in kfd_remove_sysfs_node_entry()
596 &cache->attr); in kfd_remove_sysfs_node_entry()
597 cache->kobj = NULL; in kfd_remove_sysfs_node_entry()
599 kobject_del(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
600 kobject_put(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
601 dev->kobj_cache = NULL; in kfd_remove_sysfs_node_entry()
604 if (dev->kobj_mem) { in kfd_remove_sysfs_node_entry()
605 list_for_each_entry(mem, &dev->mem_props, list) in kfd_remove_sysfs_node_entry()
606 if (mem->kobj) { in kfd_remove_sysfs_node_entry()
607 kfd_remove_sysfs_file(mem->kobj, &mem->attr); in kfd_remove_sysfs_node_entry()
608 mem->kobj = NULL; in kfd_remove_sysfs_node_entry()
610 kobject_del(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
611 kobject_put(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
612 dev->kobj_mem = NULL; in kfd_remove_sysfs_node_entry()
615 if (dev->kobj_perf) { in kfd_remove_sysfs_node_entry()
616 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_remove_sysfs_node_entry()
617 kfree(perf->attr_group); in kfd_remove_sysfs_node_entry()
618 perf->attr_group = NULL; in kfd_remove_sysfs_node_entry()
620 kobject_del(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
621 kobject_put(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
622 dev->kobj_perf = NULL; in kfd_remove_sysfs_node_entry()
625 if (dev->kobj_node) { in kfd_remove_sysfs_node_entry()
626 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); in kfd_remove_sysfs_node_entry()
627 sysfs_remove_file(dev->kobj_node, &dev->attr_name); in kfd_remove_sysfs_node_entry()
628 sysfs_remove_file(dev->kobj_node, &dev->attr_props); in kfd_remove_sysfs_node_entry()
629 kobject_del(dev->kobj_node); in kfd_remove_sysfs_node_entry()
630 kobject_put(dev->kobj_node); in kfd_remove_sysfs_node_entry()
631 dev->kobj_node = NULL; in kfd_remove_sysfs_node_entry()
636 uint32_t id) in kfd_build_sysfs_node_entry() argument
647 if (WARN_ON(dev->kobj_node)) in kfd_build_sysfs_node_entry()
648 return -EEXIST; in kfd_build_sysfs_node_entry()
653 dev->kobj_node = kfd_alloc_struct(dev->kobj_node); in kfd_build_sysfs_node_entry()
654 if (!dev->kobj_node) in kfd_build_sysfs_node_entry()
655 return -ENOMEM; in kfd_build_sysfs_node_entry()
657 ret = kobject_init_and_add(dev->kobj_node, &node_type, in kfd_build_sysfs_node_entry()
658 sys_props.kobj_nodes, "%d", id); in kfd_build_sysfs_node_entry()
660 kobject_put(dev->kobj_node); in kfd_build_sysfs_node_entry()
664 dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); in kfd_build_sysfs_node_entry()
665 if (!dev->kobj_mem) in kfd_build_sysfs_node_entry()
666 return -ENOMEM; in kfd_build_sysfs_node_entry()
668 dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); in kfd_build_sysfs_node_entry()
669 if (!dev->kobj_cache) in kfd_build_sysfs_node_entry()
670 return -ENOMEM; in kfd_build_sysfs_node_entry()
672 dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
673 if (!dev->kobj_iolink) in kfd_build_sysfs_node_entry()
674 return -ENOMEM; in kfd_build_sysfs_node_entry()
676 dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
677 if (!dev->kobj_p2plink) in kfd_build_sysfs_node_entry()
678 return -ENOMEM; in kfd_build_sysfs_node_entry()
680 dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); in kfd_build_sysfs_node_entry()
681 if (!dev->kobj_perf) in kfd_build_sysfs_node_entry()
682 return -ENOMEM; in kfd_build_sysfs_node_entry()
687 dev->attr_gpuid.name = "gpu_id"; in kfd_build_sysfs_node_entry()
688 dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
689 sysfs_attr_init(&dev->attr_gpuid); in kfd_build_sysfs_node_entry()
690 dev->attr_name.name = "name"; in kfd_build_sysfs_node_entry()
691 dev->attr_name.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
692 sysfs_attr_init(&dev->attr_name); in kfd_build_sysfs_node_entry()
693 dev->attr_props.name = "properties"; in kfd_build_sysfs_node_entry()
694 dev->attr_props.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
695 sysfs_attr_init(&dev->attr_props); in kfd_build_sysfs_node_entry()
696 ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); in kfd_build_sysfs_node_entry()
699 ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); in kfd_build_sysfs_node_entry()
702 ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); in kfd_build_sysfs_node_entry()
707 list_for_each_entry(mem, &dev->mem_props, list) { in kfd_build_sysfs_node_entry()
708 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
709 if (!mem->kobj) in kfd_build_sysfs_node_entry()
710 return -ENOMEM; in kfd_build_sysfs_node_entry()
711 ret = kobject_init_and_add(mem->kobj, &mem_type, in kfd_build_sysfs_node_entry()
712 dev->kobj_mem, "%d", i); in kfd_build_sysfs_node_entry()
714 kobject_put(mem->kobj); in kfd_build_sysfs_node_entry()
718 mem->attr.name = "properties"; in kfd_build_sysfs_node_entry()
719 mem->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
720 sysfs_attr_init(&mem->attr); in kfd_build_sysfs_node_entry()
721 ret = sysfs_create_file(mem->kobj, &mem->attr); in kfd_build_sysfs_node_entry()
728 list_for_each_entry(cache, &dev->cache_props, list) { in kfd_build_sysfs_node_entry()
729 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
730 if (!cache->kobj) in kfd_build_sysfs_node_entry()
731 return -ENOMEM; in kfd_build_sysfs_node_entry()
732 ret = kobject_init_and_add(cache->kobj, &cache_type, in kfd_build_sysfs_node_entry()
733 dev->kobj_cache, "%d", i); in kfd_build_sysfs_node_entry()
735 kobject_put(cache->kobj); in kfd_build_sysfs_node_entry()
739 cache->attr.name = "properties"; in kfd_build_sysfs_node_entry()
740 cache->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
741 sysfs_attr_init(&cache->attr); in kfd_build_sysfs_node_entry()
742 ret = sysfs_create_file(cache->kobj, &cache->attr); in kfd_build_sysfs_node_entry()
749 list_for_each_entry(iolink, &dev->io_link_props, list) { in kfd_build_sysfs_node_entry()
750 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
751 if (!iolink->kobj) in kfd_build_sysfs_node_entry()
752 return -ENOMEM; in kfd_build_sysfs_node_entry()
753 ret = kobject_init_and_add(iolink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
754 dev->kobj_iolink, "%d", i); in kfd_build_sysfs_node_entry()
756 kobject_put(iolink->kobj); in kfd_build_sysfs_node_entry()
760 iolink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
761 iolink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
762 sysfs_attr_init(&iolink->attr); in kfd_build_sysfs_node_entry()
763 ret = sysfs_create_file(iolink->kobj, &iolink->attr); in kfd_build_sysfs_node_entry()
770 list_for_each_entry(p2plink, &dev->p2p_link_props, list) { in kfd_build_sysfs_node_entry()
771 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
772 if (!p2plink->kobj) in kfd_build_sysfs_node_entry()
773 return -ENOMEM; in kfd_build_sysfs_node_entry()
774 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
775 dev->kobj_p2plink, "%d", i); in kfd_build_sysfs_node_entry()
777 kobject_put(p2plink->kobj); in kfd_build_sysfs_node_entry()
781 p2plink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
782 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
783 sysfs_attr_init(&p2plink->attr); in kfd_build_sysfs_node_entry()
784 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_sysfs_node_entry()
792 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_build_sysfs_node_entry()
793 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) in kfd_build_sysfs_node_entry()
796 if (!perf->attr_group) in kfd_build_sysfs_node_entry()
797 return -ENOMEM; in kfd_build_sysfs_node_entry()
799 attrs = (struct attribute **)(perf->attr_group + 1); in kfd_build_sysfs_node_entry()
800 if (!strcmp(perf->block_name, "iommu")) { in kfd_build_sysfs_node_entry()
805 perf_attr_iommu[0].data = perf->max_concurrent; in kfd_build_sysfs_node_entry()
809 perf->attr_group->name = perf->block_name; in kfd_build_sysfs_node_entry()
810 perf->attr_group->attrs = attrs; in kfd_build_sysfs_node_entry()
811 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); in kfd_build_sysfs_node_entry()
853 return -ENOMEM; in kfd_topology_update_sysfs()
856 &sysprops_type, &kfd_device->kobj, in kfd_topology_update_sysfs()
866 return -ENOMEM; in kfd_topology_update_sysfs()
914 list_move_tail(temp_list->next, master_list); in kfd_topology_update_device_list()
928 if (dev->node_props.cpu_cores_count && in kfd_debug_print_topology()
929 dev->node_props.simd_count) { in kfd_debug_print_topology()
931 dev->node_props.device_id, in kfd_debug_print_topology()
932 dev->node_props.vendor_id); in kfd_debug_print_topology()
933 } else if (dev->node_props.cpu_cores_count) in kfd_debug_print_topology()
935 else if (dev->node_props.simd_count) in kfd_debug_print_topology()
937 dev->node_props.device_id, in kfd_debug_print_topology()
938 dev->node_props.vendor_id); in kfd_debug_print_topology()
954 sys_props.platform_id = dev->oem_id64; in kfd_update_system_properties()
955 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); in kfd_update_system_properties()
956 sys_props.platform_rev = dev->oem_revision; in kfd_update_system_properties()
968 if (memdev->header.type != DMI_ENTRY_MEM_DEVICE) in find_system_memory()
970 if (memdev->header.length < sizeof(struct dmi_mem_device)) in find_system_memory()
973 list_for_each_entry(mem, &kdev->mem_props, list) { in find_system_memory()
974 if (memdev->total_width != 0xFFFF && memdev->total_width != 0) in find_system_memory()
975 mem->width = memdev->total_width; in find_system_memory()
976 if (memdev->speed != 0) in find_system_memory()
977 mem->mem_clk_max = memdev->speed; in find_system_memory()
981 /* kfd_add_non_crat_information - Add information that is not currently
983 * @dev - topology device to which addition info is added
988 if (!kdev->gpu) { in kfd_add_non_crat_information()
992 /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ in kfd_add_non_crat_information()
1005 /* topology_device_list - Master list of all topology devices in kfd_topology_init()
1006 * temp_topology_device_list - temporary list created while parsing CRAT in kfd_topology_init()
1048 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_init()
1059 /* For nodes with GPU, this information gets added in kfd_topology_init()
1060 * when GPU is detected (kfd_topology_add_device). in kfd_topology_init()
1084 static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) in kfd_generate_gpu_id() argument
1093 if (!gpu) in kfd_generate_gpu_id()
1097 local_mem_size = gpu->local_mem_info.local_mem_size_private + in kfd_generate_gpu_id()
1098 gpu->local_mem_info.local_mem_size_public; in kfd_generate_gpu_id()
1099 buf[0] = gpu->adev->pdev->devfn; in kfd_generate_gpu_id()
1100 buf[1] = gpu->adev->pdev->subsystem_vendor | in kfd_generate_gpu_id()
1101 (gpu->adev->pdev->subsystem_device << 16); in kfd_generate_gpu_id()
1102 buf[2] = pci_domain_nr(gpu->adev->pdev->bus); in kfd_generate_gpu_id()
1103 buf[3] = gpu->adev->pdev->device; in kfd_generate_gpu_id()
1104 buf[4] = gpu->adev->pdev->bus->number; in kfd_generate_gpu_id()
1107 buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); in kfd_generate_gpu_id()
1110 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1114 * that the value could be 0 or non-unique. So, check if in kfd_generate_gpu_id()
1115 * it is unique and non-zero. If not unique increment till in kfd_generate_gpu_id()
1125 if (dev->gpu && dev->gpu_id == gpu_id) { in kfd_generate_gpu_id()
1132 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1138 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1139 * the GPU device is not already present in the topology device
1141 * be created for this GPU.
1143 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) in kfd_assign_gpu() argument
1156 if (dev->node_props.cpu_cores_count) in kfd_assign_gpu()
1159 if (!dev->gpu && (dev->node_props.simd_count > 0)) { in kfd_assign_gpu()
1160 dev->gpu = gpu; in kfd_assign_gpu()
1163 list_for_each_entry(mem, &dev->mem_props, list) in kfd_assign_gpu()
1164 mem->gpu = dev->gpu; in kfd_assign_gpu()
1165 list_for_each_entry(cache, &dev->cache_props, list) in kfd_assign_gpu()
1166 cache->gpu = dev->gpu; in kfd_assign_gpu()
1167 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_assign_gpu()
1168 iolink->gpu = dev->gpu; in kfd_assign_gpu()
1169 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_assign_gpu()
1170 p2plink->gpu = dev->gpu; in kfd_assign_gpu()
1181 * of the GPU in kfd_notify_gpu_change()
1185 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1198 * for dGPUs - VCRAT reports only one bank of Local Memory in kfd_fill_mem_clk_max_info()
1199 * for APUs - If CRAT from ACPI reports more than one bank, then in kfd_fill_mem_clk_max_info()
1202 amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, in kfd_fill_mem_clk_max_info()
1203 dev->gpu->xcp); in kfd_fill_mem_clk_max_info()
1205 list_for_each_entry(mem, &dev->mem_props, list) in kfd_fill_mem_clk_max_info()
1206 mem->mem_clk_max = local_mem_info.mem_clk_max; in kfd_fill_mem_clk_max_info()
1214 if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_set_iolink_no_atomics()
1221 pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, in kfd_set_iolink_no_atomics()
1226 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1228 /* set gpu (dev) flags. */ in kfd_set_iolink_no_atomics()
1230 if (!dev->gpu->kfd->pci_atomic_requested || in kfd_set_iolink_no_atomics()
1231 dev->gpu->adev->asic_type == CHIP_HAWAII) in kfd_set_iolink_no_atomics()
1232 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1241 /* CPU -> GPU with PCIe */ in kfd_set_iolink_non_coherent()
1242 if (!to_dev->gpu && in kfd_set_iolink_non_coherent()
1243 inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_set_iolink_non_coherent()
1244 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1246 if (to_dev->gpu) { in kfd_set_iolink_non_coherent()
1247 /* GPU <-> GPU with PCIe and in kfd_set_iolink_non_coherent()
1250 if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || in kfd_set_iolink_non_coherent()
1251 (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_iolink_non_coherent()
1252 KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { in kfd_set_iolink_non_coherent()
1253 outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1254 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1261 { -1, 14, 12, 2, 4, 8, 10, 6 },
1262 { 14, -1, 2, 10, 8, 4, 6, 12 },
1263 { 10, 2, -1, 12, 14, 6, 4, 8 },
1264 { 2, 12, 10, -1, 6, 14, 8, 4 },
1265 { 4, 8, 14, 6, -1, 10, 12, 2 },
1266 { 8, 4, 6, 14, 12, -1, 2, 10 },
1267 { 10, 6, 4, 8, 12, 2, -1, 14 },
1268 { 6, 12, 8, 4, 2, 10, 14, -1 }};
1274 struct kfd_node *gpu = outbound_link->gpu; in kfd_set_recommended_sdma_engines() local
1275 struct amdgpu_device *adev = gpu->adev; in kfd_set_recommended_sdma_engines()
1276 unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; in kfd_set_recommended_sdma_engines()
1277 unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu); in kfd_set_recommended_sdma_engines()
1278 unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu); in kfd_set_recommended_sdma_engines()
1279 uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1; in kfd_set_recommended_sdma_engines()
1281 ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines; in kfd_set_recommended_sdma_engines()
1283 bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && in kfd_set_recommended_sdma_engines()
1284 adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && in kfd_set_recommended_sdma_engines()
1285 num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) && in kfd_set_recommended_sdma_engines()
1289 int src_socket_id = adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1290 int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1293 outbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1295 inbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1299 if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) in kfd_set_recommended_sdma_engines()
1300 outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; in kfd_set_recommended_sdma_engines()
1301 if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) in kfd_set_recommended_sdma_engines()
1302 inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; in kfd_set_recommended_sdma_engines()
1305 uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_recommended_sdma_engines()
1306 num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask : in kfd_set_recommended_sdma_engines()
1309 outbound_link->rec_sdma_eng_id_mask = engine_mask; in kfd_set_recommended_sdma_engines()
1310 inbound_link->rec_sdma_eng_id_mask = engine_mask; in kfd_set_recommended_sdma_engines()
1319 if (!dev || !dev->gpu) in kfd_fill_iolink_non_crat_info()
1322 /* GPU only creates direct links so apply flags setting to all */ in kfd_fill_iolink_non_crat_info()
1323 list_for_each_entry(link, &dev->io_link_props, list) { in kfd_fill_iolink_non_crat_info()
1324 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1327 link->node_to); in kfd_fill_iolink_non_crat_info()
1332 /* Include the CPU peer in GPU hive if connected over xGMI. */ in kfd_fill_iolink_non_crat_info()
1333 if (!peer_dev->gpu && in kfd_fill_iolink_non_crat_info()
1334 link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { in kfd_fill_iolink_non_crat_info()
1336 * If the GPU is not part of a GPU hive, use its pci in kfd_fill_iolink_non_crat_info()
1337 * device location as the hive ID to bind with the CPU. in kfd_fill_iolink_non_crat_info()
1339 if (!dev->node_props.hive_id) in kfd_fill_iolink_non_crat_info()
1340 dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); in kfd_fill_iolink_non_crat_info()
1341 peer_dev->node_props.hive_id = dev->node_props.hive_id; in kfd_fill_iolink_non_crat_info()
1344 list_for_each_entry(inbound_link, &peer_dev->io_link_props, in kfd_fill_iolink_non_crat_info()
1346 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1349 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1357 list_for_each_entry(link, &dev->p2p_link_props, list) { in kfd_fill_iolink_non_crat_info()
1358 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1361 link->node_to); in kfd_fill_iolink_non_crat_info()
1366 list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, in kfd_fill_iolink_non_crat_info()
1368 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1371 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1383 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_p2p_node_entry()
1384 if (!p2plink->kobj) in kfd_build_p2p_node_entry()
1385 return -ENOMEM; in kfd_build_p2p_node_entry()
1387 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_p2p_node_entry()
1388 dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); in kfd_build_p2p_node_entry()
1390 kobject_put(p2plink->kobj); in kfd_build_p2p_node_entry()
1394 p2plink->attr.name = "properties"; in kfd_build_p2p_node_entry()
1395 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_p2p_node_entry()
1396 sysfs_attr_init(&p2plink->attr); in kfd_build_p2p_node_entry()
1397 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_p2p_node_entry()
1414 if (cpu_dev->gpu) in kfd_create_indirect_link_prop()
1419 if (list_empty(&kdev->io_link_props)) in kfd_create_indirect_link_prop()
1420 return -ENODATA; in kfd_create_indirect_link_prop()
1422 gpu_link = list_first_entry(&kdev->io_link_props, in kfd_create_indirect_link_prop()
1426 /* CPU <--> GPU */ in kfd_create_indirect_link_prop()
1427 if (gpu_link->node_to == i) in kfd_create_indirect_link_prop()
1430 /* find CPU <--> CPU links */ in kfd_create_indirect_link_prop()
1435 &cpu_dev->io_link_props, list) { in kfd_create_indirect_link_prop()
1436 if (tmp_link->node_to == gpu_link->node_to) { in kfd_create_indirect_link_prop()
1444 return -ENOMEM; in kfd_create_indirect_link_prop()
1446 /* CPU <--> CPU <--> GPU, GPU node*/ in kfd_create_indirect_link_prop()
1449 return -ENOMEM; in kfd_create_indirect_link_prop()
1452 props->weight = gpu_link->weight + cpu_link->weight; in kfd_create_indirect_link_prop()
1453 props->min_latency = gpu_link->min_latency + cpu_link->min_latency; in kfd_create_indirect_link_prop()
1454 props->max_latency = gpu_link->max_latency + cpu_link->max_latency; in kfd_create_indirect_link_prop()
1455 props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); in kfd_create_indirect_link_prop()
1456 props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); in kfd_create_indirect_link_prop()
1458 props->node_from = gpu_node; in kfd_create_indirect_link_prop()
1459 props->node_to = i; in kfd_create_indirect_link_prop()
1460 kdev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1461 list_add_tail(&props->list, &kdev->p2p_link_props); in kfd_create_indirect_link_prop()
1466 /* for small Bar, no CPU --> GPU in-direct links */ in kfd_create_indirect_link_prop()
1467 if (kfd_dev_is_large_bar(kdev->gpu)) { in kfd_create_indirect_link_prop()
1468 /* CPU <--> CPU <--> GPU, CPU node*/ in kfd_create_indirect_link_prop()
1471 return -ENOMEM; in kfd_create_indirect_link_prop()
1474 props2->node_from = i; in kfd_create_indirect_link_prop()
1475 props2->node_to = gpu_node; in kfd_create_indirect_link_prop()
1476 props2->kobj = NULL; in kfd_create_indirect_link_prop()
1477 cpu_dev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1478 list_add_tail(&props2->list, &cpu_dev->p2p_link_props); in kfd_create_indirect_link_prop()
1497 kdev->gpu->adev, in kfd_add_peer_prop()
1498 peer->gpu->adev)) in kfd_add_peer_prop()
1501 if (list_empty(&kdev->io_link_props)) in kfd_add_peer_prop()
1502 return -ENODATA; in kfd_add_peer_prop()
1504 iolink1 = list_first_entry(&kdev->io_link_props, in kfd_add_peer_prop()
1507 if (list_empty(&peer->io_link_props)) in kfd_add_peer_prop()
1508 return -ENODATA; in kfd_add_peer_prop()
1510 iolink2 = list_first_entry(&peer->io_link_props, in kfd_add_peer_prop()
1515 return -ENOMEM; in kfd_add_peer_prop()
1519 props->weight = iolink1->weight + iolink2->weight; in kfd_add_peer_prop()
1520 props->min_latency = iolink1->min_latency + iolink2->min_latency; in kfd_add_peer_prop()
1521 props->max_latency = iolink1->max_latency + iolink2->max_latency; in kfd_add_peer_prop()
1522 props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); in kfd_add_peer_prop()
1523 props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); in kfd_add_peer_prop()
1525 if (iolink1->node_to != iolink2->node_to) { in kfd_add_peer_prop()
1526 /* CPU->CPU link*/ in kfd_add_peer_prop()
1527 cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); in kfd_add_peer_prop()
1529 list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { in kfd_add_peer_prop()
1530 if (iolink3->node_to != iolink2->node_to) in kfd_add_peer_prop()
1533 props->weight += iolink3->weight; in kfd_add_peer_prop()
1534 props->min_latency += iolink3->min_latency; in kfd_add_peer_prop()
1535 props->max_latency += iolink3->max_latency; in kfd_add_peer_prop()
1536 props->min_bandwidth = min(props->min_bandwidth, in kfd_add_peer_prop()
1537 iolink3->min_bandwidth); in kfd_add_peer_prop()
1538 props->max_bandwidth = min(props->max_bandwidth, in kfd_add_peer_prop()
1539 iolink3->max_bandwidth); in kfd_add_peer_prop()
1547 props->node_from = from; in kfd_add_peer_prop()
1548 props->node_to = to; in kfd_add_peer_prop()
1549 peer->node_props.p2p_links_count++; in kfd_add_peer_prop()
1550 list_add_tail(&props->list, &peer->p2p_link_props); in kfd_add_peer_prop()
1574 if (WARN_ON(!new_dev->gpu)) in kfd_dev_create_p2p_links()
1577 k--; in kfd_dev_create_p2p_links()
1579 /* create in-direct links */ in kfd_dev_create_p2p_links()
1590 if (!dev->gpu || !dev->gpu->adev || in kfd_dev_create_p2p_links()
1591 (dev->gpu->kfd->hive_id && in kfd_dev_create_p2p_links()
1592 dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id && in kfd_dev_create_p2p_links()
1593 amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev))) in kfd_dev_create_p2p_links()
1596 /* check if node(s) is/are peer accessible in one direction or bi-direction */ in kfd_dev_create_p2p_links()
1626 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l1_pcache()
1630 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l1_pcache()
1636 return -ENOMEM; in fill_in_l1_pcache()
1639 pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); in fill_in_l1_pcache()
1640 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l1_pcache()
1641 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l1_pcache()
1642 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l1_pcache()
1645 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l1_pcache()
1647 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l1_pcache()
1649 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l1_pcache()
1651 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l1_pcache()
1657 cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l1_pcache()
1659 pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l1_pcache()
1660 pcache->sibling_map[1] = in fill_in_l1_pcache()
1662 pcache->sibling_map[2] = in fill_in_l1_pcache()
1664 pcache->sibling_map[3] = in fill_in_l1_pcache()
1667 pcache->sibling_map_size = 4; in fill_in_l1_pcache()
1686 int num_xcc = NUM_XCC(knode->xcc_mask); in fill_in_l2_l3_pcache()
1689 struct amdgpu_device *adev = knode->adev; in fill_in_l2_l3_pcache()
1692 start = ffs(knode->xcc_mask) - 1; in fill_in_l2_l3_pcache()
1699 for (i = 0; i < gfx_info->max_shader_engines && !found; i++) { in fill_in_l2_l3_pcache()
1700 for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) { in fill_in_l2_l3_pcache()
1701 if (cu_info->bitmap[start][i % 4][j % 4]) { in fill_in_l2_l3_pcache()
1703 cu_info->bitmap[start][i % 4][j % 4]; in fill_in_l2_l3_pcache()
1710 ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1714 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l2_l3_pcache()
1720 return -ENOMEM; in fill_in_l2_l3_pcache()
1723 pcache->processor_id_low = cu_processor_id in fill_in_l2_l3_pcache()
1724 + (first_active_cu - 1); in fill_in_l2_l3_pcache()
1725 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l2_l3_pcache()
1726 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l2_l3_pcache()
1731 mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); in fill_in_l2_l3_pcache()
1735 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l2_l3_pcache()
1737 if (mode && pcache->cache_level == 3) in fill_in_l2_l3_pcache()
1738 pcache->cache_size /= mode; in fill_in_l2_l3_pcache()
1741 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l2_l3_pcache()
1743 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l2_l3_pcache()
1745 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l2_l3_pcache()
1747 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l2_l3_pcache()
1752 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l2_l3_pcache()
1756 for (i = 0; i < gfx_info->max_shader_engines; i++) { in fill_in_l2_l3_pcache()
1757 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in fill_in_l2_l3_pcache()
1758 pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l2_l3_pcache()
1759 pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); in fill_in_l2_l3_pcache()
1760 pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); in fill_in_l2_l3_pcache()
1761 pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); in fill_in_l2_l3_pcache()
1764 cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4]; in fill_in_l2_l3_pcache()
1765 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1769 pcache->sibling_map_size = k; in fill_in_l2_l3_pcache()
1778 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
1789 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_fill_cache_non_crat_info()
1790 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_fill_cache_non_crat_info()
1798 gpu_processor_id = dev->node_props.simd_id_base; in kfd_fill_cache_non_crat_info()
1817 start = ffs(kdev->xcc_mask) - 1; in kfd_fill_cache_non_crat_info()
1818 end = start + NUM_XCC(kdev->xcc_mask); in kfd_fill_cache_non_crat_info()
1824 for (i = 0; i < gfx_info->max_shader_engines; i++) { in kfd_fill_cache_non_crat_info()
1825 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in kfd_fill_cache_non_crat_info()
1826 for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) { in kfd_fill_cache_non_crat_info()
1829 cu_info->bitmap[xcc][i % 4][j + i / 4], ct, in kfd_fill_cache_non_crat_info()
1837 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1842 gfx_info->max_cu_per_sh) ? in kfd_fill_cache_non_crat_info()
1844 (gfx_info->max_cu_per_sh - k); in kfd_fill_cache_non_crat_info()
1859 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1863 dev->node_props.caches_count += num_of_entries; in kfd_fill_cache_non_crat_info()
1864 pr_debug("Added [%d] GPU cache entries\n", num_of_entries); in kfd_fill_cache_non_crat_info()
1867 static int kfd_topology_add_device_locked(struct kfd_node *gpu, in kfd_topology_add_device_locked() argument
1877 COMPUTE_UNIT_GPU, gpu, in kfd_topology_add_device_locked()
1880 dev_err(gpu->adev->dev, "Error creating VCRAT\n"); in kfd_topology_add_device_locked()
1881 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1891 dev_err(gpu->adev->dev, "Error parsing VCRAT\n"); in kfd_topology_add_device_locked()
1892 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1899 *dev = kfd_assign_gpu(gpu); in kfd_topology_add_device_locked()
1901 res = -ENODEV; in kfd_topology_add_device_locked()
1908 kfd_fill_cache_non_crat_info(*dev, gpu); in kfd_topology_add_device_locked()
1917 dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n", in kfd_topology_add_device_locked()
1929 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && in kfd_topology_set_dbg_firmware_support()
1930 KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { in kfd_topology_set_dbg_firmware_support()
1931 uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1934 uint32_t mes_rev = dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1945 switch (KFD_GC_VERSION(dev->gpu)) { in kfd_topology_set_dbg_firmware_support()
1947 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; in kfd_topology_set_dbg_firmware_support()
1954 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; in kfd_topology_set_dbg_firmware_support()
1957 firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; in kfd_topology_set_dbg_firmware_support()
1960 firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; in kfd_topology_set_dbg_firmware_support()
1965 firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; in kfd_topology_set_dbg_firmware_support()
1972 firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; in kfd_topology_set_dbg_firmware_support()
1984 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; in kfd_topology_set_dbg_firmware_support()
1989 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << in kfd_topology_set_capabilities()
1993 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | in kfd_topology_set_capabilities()
1997 if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) in kfd_topology_set_capabilities()
1998 dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; in kfd_topology_set_capabilities()
2000 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { in kfd_topology_set_capabilities()
2001 if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) || in kfd_topology_set_capabilities()
2002 KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4)) in kfd_topology_set_capabilities()
2003 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
2007 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
2011 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) in kfd_topology_set_capabilities()
2012 dev->node_props.capability |= in kfd_topology_set_capabilities()
2015 if (!amdgpu_sriov_vf(dev->gpu->adev)) in kfd_topology_set_capabilities()
2016 dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; in kfd_topology_set_capabilities()
2019 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | in kfd_topology_set_capabilities()
2022 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) in kfd_topology_set_capabilities()
2023 dev->node_props.capability |= in kfd_topology_set_capabilities()
2030 int kfd_topology_add_device(struct kfd_node *gpu) in kfd_topology_add_device() argument
2036 const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; in kfd_topology_add_device()
2037 struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; in kfd_topology_add_device()
2038 struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; in kfd_topology_add_device()
2040 if (gpu->xcp && !gpu->xcp->ddev) { in kfd_topology_add_device()
2041 dev_warn(gpu->adev->dev, in kfd_topology_add_device()
2042 "Won't add GPU to topology since it has no drm node assigned."); in kfd_topology_add_device()
2045 dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n"); in kfd_topology_add_device()
2048 /* Check to see if this gpu device exists in the topology_device_list. in kfd_topology_add_device()
2049 * If so, assign the gpu to that device, in kfd_topology_add_device()
2050 * else create a Virtual CRAT for this gpu device and then parse that in kfd_topology_add_device()
2051 * CRAT to create a new topology device. Once created assign the gpu to in kfd_topology_add_device()
2055 dev = kfd_assign_gpu(gpu); in kfd_topology_add_device()
2057 res = kfd_topology_add_device_locked(gpu, &dev); in kfd_topology_add_device()
2062 gpu_id = kfd_generate_gpu_id(gpu); in kfd_topology_add_device()
2063 dev->gpu_id = gpu_id; in kfd_topology_add_device()
2064 gpu->id = gpu_id; in kfd_topology_add_device()
2072 /* Fill-in additional information that is not available in CRAT but in kfd_topology_add_device()
2075 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { in kfd_topology_add_device()
2076 dev->node_props.name[i] = __tolower(asic_name[i]); in kfd_topology_add_device()
2080 dev->node_props.name[i] = '\0'; in kfd_topology_add_device()
2082 dev->node_props.simd_arrays_per_engine = in kfd_topology_add_device()
2083 gfx_info->max_sh_per_se; in kfd_topology_add_device()
2085 dev->node_props.gfx_target_version = in kfd_topology_add_device()
2086 gpu->kfd->device_info.gfx_target_version; in kfd_topology_add_device()
2087 dev->node_props.vendor_id = gpu->adev->pdev->vendor; in kfd_topology_add_device()
2088 dev->node_props.device_id = gpu->adev->pdev->device; in kfd_topology_add_device()
2089 dev->node_props.capability |= in kfd_topology_add_device()
2090 ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & in kfd_topology_add_device()
2093 dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); in kfd_topology_add_device()
2094 if (gpu->kfd->num_nodes > 1) in kfd_topology_add_device()
2095 dev->node_props.location_id |= dev->gpu->node_id; in kfd_topology_add_device()
2097 dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); in kfd_topology_add_device()
2098 dev->node_props.max_engine_clk_fcompute = in kfd_topology_add_device()
2099 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); in kfd_topology_add_device()
2100 dev->node_props.max_engine_clk_ccompute = in kfd_topology_add_device()
2103 if (gpu->xcp) in kfd_topology_add_device()
2104 dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; in kfd_topology_add_device()
2106 dev->node_props.drm_render_minor = in kfd_topology_add_device()
2107 gpu->kfd->shared_resources.drm_render_minor; in kfd_topology_add_device()
2109 dev->node_props.hive_id = gpu->kfd->hive_id; in kfd_topology_add_device()
2110 dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); in kfd_topology_add_device()
2111 dev->node_props.num_sdma_xgmi_engines = in kfd_topology_add_device()
2112 kfd_get_num_xgmi_sdma_engines(gpu); in kfd_topology_add_device()
2113 dev->node_props.num_sdma_queues_per_engine = in kfd_topology_add_device()
2114 gpu->kfd->device_info.num_sdma_queues_per_engine - in kfd_topology_add_device()
2115 gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; in kfd_topology_add_device()
2116 dev->node_props.num_gws = (dev->gpu->gws && in kfd_topology_add_device()
2117 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? in kfd_topology_add_device()
2118 dev->gpu->adev->gds.gws_size : 0; in kfd_topology_add_device()
2119 dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); in kfd_topology_add_device()
2124 switch (dev->gpu->adev->asic_type) { in kfd_topology_add_device()
2128 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << in kfd_topology_add_device()
2139 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << in kfd_topology_add_device()
2144 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2146 dev->gpu->adev->asic_type); in kfd_topology_add_device()
2155 dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; in kfd_topology_add_device()
2162 if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { in kfd_topology_add_device()
2163 dev->node_props.simd_count = in kfd_topology_add_device()
2164 cu_info->simd_per_cu * cu_info->number; in kfd_topology_add_device()
2165 dev->node_props.max_waves_per_simd = 10; in kfd_topology_add_device()
2169 dev->node_props.capability |= in kfd_topology_add_device()
2170 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? in kfd_topology_add_device()
2172 dev->node_props.capability |= in kfd_topology_add_device()
2173 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? in kfd_topology_add_device()
2176 if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2177 dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? in kfd_topology_add_device()
2180 if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) in kfd_topology_add_device()
2181 dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; in kfd_topology_add_device()
2183 if (dev->gpu->adev->gmc.is_app_apu || in kfd_topology_add_device()
2184 dev->gpu->adev->gmc.xgmi.connected_to_cpu) in kfd_topology_add_device()
2185 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; in kfd_topology_add_device()
2197 * kfd_topology_update_io_links() - Update IO links after device removal.
2221 if (dev->proximity_domain > proximity_domain) in kfd_topology_update_io_links()
2222 dev->proximity_domain--; in kfd_topology_update_io_links()
2224 list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { in kfd_topology_update_io_links()
2229 if (iolink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2230 list_del(&iolink->list); in kfd_topology_update_io_links()
2231 dev->node_props.io_links_count--; in kfd_topology_update_io_links()
2233 if (iolink->node_from > proximity_domain) in kfd_topology_update_io_links()
2234 iolink->node_from--; in kfd_topology_update_io_links()
2235 if (iolink->node_to > proximity_domain) in kfd_topology_update_io_links()
2236 iolink->node_to--; in kfd_topology_update_io_links()
2240 list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { in kfd_topology_update_io_links()
2245 if (p2plink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2246 list_del(&p2plink->list); in kfd_topology_update_io_links()
2247 dev->node_props.p2p_links_count--; in kfd_topology_update_io_links()
2249 if (p2plink->node_from > proximity_domain) in kfd_topology_update_io_links()
2250 p2plink->node_from--; in kfd_topology_update_io_links()
2251 if (p2plink->node_to > proximity_domain) in kfd_topology_update_io_links()
2252 p2plink->node_to--; in kfd_topology_update_io_links()
2258 int kfd_topology_remove_device(struct kfd_node *gpu) in kfd_topology_remove_device() argument
2262 int res = -ENODEV; in kfd_topology_remove_device()
2268 if (dev->gpu == gpu) { in kfd_topology_remove_device()
2269 gpu_id = dev->gpu_id; in kfd_topology_remove_device()
2272 sys_props.num_devices--; in kfd_topology_remove_device()
2274 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_remove_device()
2292 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
2293 * topology. If GPU device is found @idx, then valid kfd_dev pointer is
2295 * Return - 0: On success (@kdev will be NULL for non GPU nodes)
2296 * -1: If end of list
2309 *kdev = top_dev->gpu; in kfd_topology_enum_kfd_devices()
2319 return -1; in kfd_topology_enum_kfd_devices()
2328 return -1; in kfd_cpumask_to_apic_id()
2331 return -1; in kfd_cpumask_to_apic_id()
2339 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
2341 * Return -1 on failure
2345 if (numa_node_id == -1) { in kfd_numa_node_to_apic_id()
2363 if (!dev->gpu) { in kfd_debugfs_hqds_by_device()
2368 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_hqds_by_device()
2369 r = dqm_debugfs_hqds(m, dev->gpu->dqm); in kfd_debugfs_hqds_by_device()
2388 if (!dev->gpu) { in kfd_debugfs_rls_by_device()
2393 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_rls_by_device()
2394 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); in kfd_debugfs_rls_by_device()