Lines Matching +full:pcs +full:- +full:handle

127 	[0x00] = "XGMI PCS DataLossErr",
128 [0x01] = "XGMI PCS TrainingErr",
129 [0x02] = "XGMI PCS FlowCtrlAckErr",
130 [0x03] = "XGMI PCS RxFifoUnderflowErr",
131 [0x04] = "XGMI PCS RxFifoOverflowErr",
132 [0x05] = "XGMI PCS CRCErr",
133 [0x06] = "XGMI PCS BERExceededErr",
134 [0x07] = "XGMI PCS TxMetaDataErr",
135 [0x08] = "XGMI PCS ReplayBufParityErr",
136 [0x09] = "XGMI PCS DataParityErr",
137 [0x0a] = "XGMI PCS ReplayFifoOverflowErr",
138 [0x0b] = "XGMI PCS ReplayFifoUnderflowErr",
139 [0x0c] = "XGMI PCS ElasticFifoOverflowErr",
140 [0x0d] = "XGMI PCS DeskewErr",
141 [0x0e] = "XGMI PCS FlowCtrlCRCErr",
142 [0x0f] = "XGMI PCS DataStartupLimitErr",
143 [0x10] = "XGMI PCS FCInitTimeoutErr",
144 [0x11] = "XGMI PCS RecoveryTimeoutErr",
145 [0x12] = "XGMI PCS ReadySerialTimeoutErr",
146 [0x13] = "XGMI PCS ReadySerialAttemptErr",
147 [0x14] = "XGMI PCS RecoveryAttemptErr",
148 [0x15] = "XGMI PCS RecoveryRelockAttemptErr",
149 [0x16] = "XGMI PCS ReplayAttemptErr",
150 [0x17] = "XGMI PCS SyncHdrErr",
151 [0x18] = "XGMI PCS TxReplayTimeoutErr",
152 [0x19] = "XGMI PCS RxReplayTimeoutErr",
153 [0x1a] = "XGMI PCS LinkSubTxTimeoutErr",
154 [0x1b] = "XGMI PCS LinkSubRxTimeoutErr",
155 [0x1c] = "XGMI PCS RxCMDPktErr",
159 {"XGMI PCS DataLossErr",
161 {"XGMI PCS TrainingErr",
163 {"XGMI PCS CRCErr",
165 {"XGMI PCS BERExceededErr",
167 {"XGMI PCS TxMetaDataErr",
169 {"XGMI PCS ReplayBufParityErr",
171 {"XGMI PCS DataParityErr",
173 {"XGMI PCS ReplayFifoOverflowErr",
175 {"XGMI PCS ReplayFifoUnderflowErr",
177 {"XGMI PCS ElasticFifoOverflowErr",
179 {"XGMI PCS DeskewErr",
181 {"XGMI PCS DataStartupLimitErr",
183 {"XGMI PCS FCInitTimeoutErr",
185 {"XGMI PCS RecoveryTimeoutErr",
187 {"XGMI PCS ReadySerialTimeoutErr",
189 {"XGMI PCS ReadySerialAttemptErr",
191 {"XGMI PCS RecoveryAttemptErr",
193 {"XGMI PCS RecoveryRelockAttemptErr",
198 {"WAFL PCS DataLossErr",
200 {"WAFL PCS TrainingErr",
202 {"WAFL PCS CRCErr",
204 {"WAFL PCS BERExceededErr",
206 {"WAFL PCS TxMetaDataErr",
208 {"WAFL PCS ReplayBufParityErr",
210 {"WAFL PCS DataParityErr",
212 {"WAFL PCS ReplayFifoOverflowErr",
214 {"WAFL PCS ReplayFifoUnderflowErr",
216 {"WAFL PCS ElasticFifoOverflowErr",
218 {"WAFL PCS DeskewErr",
220 {"WAFL PCS DataStartupLimitErr",
222 {"WAFL PCS FCInitTimeoutErr",
224 {"WAFL PCS RecoveryTimeoutErr",
226 {"WAFL PCS ReadySerialTimeoutErr",
228 {"WAFL PCS ReadySerialAttemptErr",
230 {"WAFL PCS RecoveryAttemptErr",
232 {"WAFL PCS RecoveryRelockAttemptErr",
237 {"XGMI3X16 PCS DataLossErr",
239 {"XGMI3X16 PCS TrainingErr",
241 {"XGMI3X16 PCS FlowCtrlAckErr",
243 {"XGMI3X16 PCS RxFifoUnderflowErr",
245 {"XGMI3X16 PCS RxFifoOverflowErr",
247 {"XGMI3X16 PCS CRCErr",
249 {"XGMI3X16 PCS BERExceededErr",
251 {"XGMI3X16 PCS TxVcidDataErr",
253 {"XGMI3X16 PCS ReplayBufParityErr",
255 {"XGMI3X16 PCS DataParityErr",
257 {"XGMI3X16 PCS ReplayFifoOverflowErr",
259 {"XGMI3X16 PCS ReplayFifoUnderflowErr",
261 {"XGMI3X16 PCS ElasticFifoOverflowErr",
263 {"XGMI3X16 PCS DeskewErr",
265 {"XGMI3X16 PCS FlowCtrlCRCErr",
267 {"XGMI3X16 PCS DataStartupLimitErr",
269 {"XGMI3X16 PCS FCInitTimeoutErr",
271 {"XGMI3X16 PCS RecoveryTimeoutErr",
273 {"XGMI3X16 PCS ReadySerialTimeoutErr",
275 {"XGMI3X16 PCS ReadySerialAttemptErr",
277 {"XGMI3X16 PCS RecoveryAttemptErr",
279 {"XGMI3X16 PCS RecoveryRelockAttemptErr",
281 {"XGMI3X16 PCS ReplayAttemptErr",
283 {"XGMI3X16 PCS SyncHdrErr",
285 {"XGMI3X16 PCS TxReplayTimeoutErr",
287 {"XGMI3X16 PCS RxReplayTimeoutErr",
289 {"XGMI3X16 PCS LinkSubTxTimeoutErr",
291 {"XGMI3X16 PCS LinkSubRxTimeoutErr",
293 {"XGMI3X16 PCS RxCMDPktErr",
308 return -EINVAL; in amdgpu_xgmi_get_ext_link()
311 return -EINVAL; in amdgpu_xgmi_get_ext_link()
336 addr += adev->asic_funcs->encode_ext_smn_addressing(i); in xgmi_v6_4_get_link_status()
351 return -EOPNOTSUPP; in amdgpu_get_xgmi_link_status()
355 return -ENOLINK; in amdgpu_get_xgmi_link_status()
368 * hive ID and individual node IDs, both of which are 64-bit numbers.
373 * Inside the device directory a sub-directory 'xgmi_hive_info' is
383 * set of node sub-directories.
408 return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id); in amdgpu_xgmi_show_attrs()
418 amdgpu_reset_put_reset_domain(hive->reset_domain); in amdgpu_xgmi_hive_release()
419 hive->reset_domain = NULL; in amdgpu_xgmi_hive_release()
421 mutex_destroy(&hive->hive_lock); in amdgpu_xgmi_hive_release()
442 return sysfs_emit(buf, "%llu\n", adev->gmc.xgmi.node_id); in amdgpu_xgmi_show_device_id()
453 return sysfs_emit(buf, "%u\n", adev->gmc.xgmi.physical_node_id); in amdgpu_xgmi_show_physical_id()
463 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_show_num_hops()
466 for (i = 0; i < top->num_nodes; i++) in amdgpu_xgmi_show_num_hops()
467 sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops); in amdgpu_xgmi_show_num_hops()
478 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_show_num_links()
481 for (i = 0; i < top->num_nodes; i++) in amdgpu_xgmi_show_num_links()
482 sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links); in amdgpu_xgmi_show_num_links()
493 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_show_connected_port_num()
501 for (i = 0; i < top->num_nodes; i++) { in amdgpu_xgmi_show_connected_port_num()
502 if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) { in amdgpu_xgmi_show_connected_port_num()
508 if (i == top->num_nodes) in amdgpu_xgmi_show_connected_port_num()
509 return -EINVAL; in amdgpu_xgmi_show_connected_port_num()
511 for (i = 0; i < top->num_nodes; i++) { in amdgpu_xgmi_show_connected_port_num()
512 for (j = 0; j < top->nodes[i].num_links; j++) in amdgpu_xgmi_show_connected_port_num()
514 size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1, in amdgpu_xgmi_show_connected_port_num()
515 top->nodes[i].port_num[j].src_xgmi_port_num, i + 1, in amdgpu_xgmi_show_connected_port_num()
516 top->nodes[i].port_num[j].dst_xgmi_port_num); in amdgpu_xgmi_show_connected_port_num()
536 if ((!adev->df.funcs) || in amdgpu_xgmi_show_error()
537 (!adev->df.funcs->get_fica) || in amdgpu_xgmi_show_error()
538 (!adev->df.funcs->set_fica)) in amdgpu_xgmi_show_error()
539 return -EINVAL; in amdgpu_xgmi_show_error()
541 fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); in amdgpu_xgmi_show_error()
545 fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in); in amdgpu_xgmi_show_error()
550 adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); in amdgpu_xgmi_show_error()
570 ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id); in amdgpu_xgmi_sysfs_add_dev_info()
572 dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n"); in amdgpu_xgmi_sysfs_add_dev_info()
576 ret = device_create_file(adev->dev, &dev_attr_xgmi_physical_id); in amdgpu_xgmi_sysfs_add_dev_info()
578 dev_err(adev->dev, "XGMI: Failed to create device file xgmi_physical_id\n"); in amdgpu_xgmi_sysfs_add_dev_info()
583 ret = device_create_file(adev->dev, &dev_attr_xgmi_error); in amdgpu_xgmi_sysfs_add_dev_info()
588 ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops); in amdgpu_xgmi_sysfs_add_dev_info()
593 ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links); in amdgpu_xgmi_sysfs_add_dev_info()
598 if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { in amdgpu_xgmi_sysfs_add_dev_info()
599 ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num); in amdgpu_xgmi_sysfs_add_dev_info()
601 dev_err(adev->dev, "failed to create xgmi_port_num\n"); in amdgpu_xgmi_sysfs_add_dev_info()
605 if (hive->kobj.parent != (&adev->dev->kobj)) { in amdgpu_xgmi_sysfs_add_dev_info()
606 ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj, in amdgpu_xgmi_sysfs_add_dev_info()
609 dev_err(adev->dev, "XGMI: Failed to create link to hive info"); in amdgpu_xgmi_sysfs_add_dev_info()
614 sprintf(node, "node%d", atomic_read(&hive->number_devices)); in amdgpu_xgmi_sysfs_add_dev_info()
616 ret = sysfs_create_link(&hive->kobj, &adev->dev->kobj, node); in amdgpu_xgmi_sysfs_add_dev_info()
618 dev_err(adev->dev, "XGMI: Failed to create link from hive info"); in amdgpu_xgmi_sysfs_add_dev_info()
626 sysfs_remove_link(&adev->dev->kobj, adev_to_drm(adev)->unique); in amdgpu_xgmi_sysfs_add_dev_info()
629 device_remove_file(adev->dev, &dev_attr_xgmi_device_id); in amdgpu_xgmi_sysfs_add_dev_info()
630 device_remove_file(adev->dev, &dev_attr_xgmi_physical_id); in amdgpu_xgmi_sysfs_add_dev_info()
631 device_remove_file(adev->dev, &dev_attr_xgmi_error); in amdgpu_xgmi_sysfs_add_dev_info()
632 device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); in amdgpu_xgmi_sysfs_add_dev_info()
633 device_remove_file(adev->dev, &dev_attr_xgmi_num_links); in amdgpu_xgmi_sysfs_add_dev_info()
634 if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) in amdgpu_xgmi_sysfs_add_dev_info()
635 device_remove_file(adev->dev, &dev_attr_xgmi_port_num); in amdgpu_xgmi_sysfs_add_dev_info()
647 device_remove_file(adev->dev, &dev_attr_xgmi_device_id); in amdgpu_xgmi_sysfs_rem_dev_info()
648 device_remove_file(adev->dev, &dev_attr_xgmi_physical_id); in amdgpu_xgmi_sysfs_rem_dev_info()
649 device_remove_file(adev->dev, &dev_attr_xgmi_error); in amdgpu_xgmi_sysfs_rem_dev_info()
650 device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); in amdgpu_xgmi_sysfs_rem_dev_info()
651 device_remove_file(adev->dev, &dev_attr_xgmi_num_links); in amdgpu_xgmi_sysfs_rem_dev_info()
652 if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) in amdgpu_xgmi_sysfs_rem_dev_info()
653 device_remove_file(adev->dev, &dev_attr_xgmi_port_num); in amdgpu_xgmi_sysfs_rem_dev_info()
655 if (hive->kobj.parent != (&adev->dev->kobj)) in amdgpu_xgmi_sysfs_rem_dev_info()
656 sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); in amdgpu_xgmi_sysfs_rem_dev_info()
658 sprintf(node, "node%d", atomic_read(&hive->number_devices)); in amdgpu_xgmi_sysfs_rem_dev_info()
659 sysfs_remove_link(&hive->kobj, node); in amdgpu_xgmi_sysfs_rem_dev_info()
670 if (!adev->gmc.xgmi.hive_id) in amdgpu_get_xgmi_hive()
673 if (adev->hive) { in amdgpu_get_xgmi_hive()
674 kobject_get(&adev->hive->kobj); in amdgpu_get_xgmi_hive()
675 return adev->hive; in amdgpu_get_xgmi_hive()
681 if (hive->hive_id == adev->gmc.xgmi.hive_id) in amdgpu_get_xgmi_hive()
687 dev_err(adev->dev, "XGMI: allocation failed\n"); in amdgpu_get_xgmi_hive()
688 ret = -ENOMEM; in amdgpu_get_xgmi_hive()
694 ret = kobject_init_and_add(&hive->kobj, in amdgpu_get_xgmi_hive()
696 &adev->dev->kobj, in amdgpu_get_xgmi_hive()
699 dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n"); in amdgpu_get_xgmi_hive()
700 kobject_put(&hive->kobj); in amdgpu_get_xgmi_hive()
706 * Only init hive->reset_domain for none SRIOV configuration. For SRIOV, in amdgpu_get_xgmi_hive()
715 * See https://www.spinics.net/lists/amd-gfx/msg58836.html in amdgpu_get_xgmi_hive()
717 if (adev->reset_domain->type != XGMI_HIVE) { in amdgpu_get_xgmi_hive()
718 hive->reset_domain = in amdgpu_get_xgmi_hive()
719 amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); in amdgpu_get_xgmi_hive()
720 if (!hive->reset_domain) { in amdgpu_get_xgmi_hive()
721 dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); in amdgpu_get_xgmi_hive()
722 ret = -ENOMEM; in amdgpu_get_xgmi_hive()
723 kobject_put(&hive->kobj); in amdgpu_get_xgmi_hive()
728 amdgpu_reset_get_reset_domain(adev->reset_domain); in amdgpu_get_xgmi_hive()
729 hive->reset_domain = adev->reset_domain; in amdgpu_get_xgmi_hive()
733 hive->hive_id = adev->gmc.xgmi.hive_id; in amdgpu_get_xgmi_hive()
734 INIT_LIST_HEAD(&hive->device_list); in amdgpu_get_xgmi_hive()
735 INIT_LIST_HEAD(&hive->node); in amdgpu_get_xgmi_hive()
736 mutex_init(&hive->hive_lock); in amdgpu_get_xgmi_hive()
737 atomic_set(&hive->number_devices, 0); in amdgpu_get_xgmi_hive()
738 task_barrier_init(&hive->tb); in amdgpu_get_xgmi_hive()
739 hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN; in amdgpu_get_xgmi_hive()
740 hive->hi_req_gpu = NULL; in amdgpu_get_xgmi_hive()
741 atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE); in amdgpu_get_xgmi_hive()
747 hive->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE; in amdgpu_get_xgmi_hive()
748 list_add_tail(&hive->node, &xgmi_hive_list); in amdgpu_get_xgmi_hive()
752 kobject_get(&hive->kobj); in amdgpu_get_xgmi_hive()
760 kobject_put(&hive->kobj); in amdgpu_put_xgmi_hive()
775 request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev; in amdgpu_xgmi_set_pstate()
776 init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; in amdgpu_xgmi_set_pstate()
781 if (!hive || adev->asic_type != CHIP_VEGA20) in amdgpu_xgmi_set_pstate()
784 mutex_lock(&hive->hive_lock); in amdgpu_xgmi_set_pstate()
787 hive->hi_req_count++; in amdgpu_xgmi_set_pstate()
789 hive->hi_req_count--; in amdgpu_xgmi_set_pstate()
795 if (hive->pstate == pstate || in amdgpu_xgmi_set_pstate()
796 (!is_hi_req && hive->hi_req_count && !init_low)) in amdgpu_xgmi_set_pstate()
799 dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate); in amdgpu_xgmi_set_pstate()
803 dev_err(request_adev->dev, in amdgpu_xgmi_set_pstate()
805 request_adev->gmc.xgmi.node_id, in amdgpu_xgmi_set_pstate()
806 request_adev->gmc.xgmi.hive_id, ret); in amdgpu_xgmi_set_pstate()
811 hive->pstate = hive->hi_req_count ? in amdgpu_xgmi_set_pstate()
812 hive->pstate : AMDGPU_XGMI_PSTATE_MIN; in amdgpu_xgmi_set_pstate()
814 hive->pstate = pstate; in amdgpu_xgmi_set_pstate()
815 hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ? in amdgpu_xgmi_set_pstate()
819 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_set_pstate()
831 ret = psp_xgmi_set_topology_info(&adev->psp, in amdgpu_xgmi_update_topology()
832 atomic_read(&hive->number_devices), in amdgpu_xgmi_update_topology()
833 &adev->psp.xgmi_context.top_info); in amdgpu_xgmi_update_topology()
835 dev_err(adev->dev, in amdgpu_xgmi_update_topology()
837 adev->gmc.xgmi.node_id, in amdgpu_xgmi_update_topology()
838 adev->gmc.xgmi.hive_id, ret); in amdgpu_xgmi_update_topology()
853 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_get_hops_count()
857 if (!adev->gmc.xgmi.supported) in amdgpu_xgmi_get_hops_count()
860 for (i = 0 ; i < top->num_nodes; ++i) in amdgpu_xgmi_get_hops_count()
861 if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) in amdgpu_xgmi_get_hops_count()
862 return top->nodes[i].num_hops & num_hops_mask; in amdgpu_xgmi_get_hops_count()
864 dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n", in amdgpu_xgmi_get_hops_count()
865 peer_adev->gmc.xgmi.physical_node_id); in amdgpu_xgmi_get_hops_count()
876 int num_lanes = adev->gmc.xgmi.max_width; in amdgpu_xgmi_get_bandwidth()
877 int speed = adev->gmc.xgmi.max_speed; in amdgpu_xgmi_get_bandwidth()
878 int num_links = !peer_mode ? 1 : -1; in amdgpu_xgmi_get_bandwidth()
881 return -EINVAL; in amdgpu_xgmi_get_bandwidth()
886 if (!adev->gmc.xgmi.supported) in amdgpu_xgmi_get_bandwidth()
887 return -ENODATA; in amdgpu_xgmi_get_bandwidth()
890 return -EINVAL; in amdgpu_xgmi_get_bandwidth()
893 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_get_bandwidth()
896 for (i = 0 ; i < top->num_nodes; ++i) { in amdgpu_xgmi_get_bandwidth()
897 if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id) in amdgpu_xgmi_get_bandwidth()
900 num_links = top->nodes[i].num_links; in amdgpu_xgmi_get_bandwidth()
905 if (num_links == -1) { in amdgpu_xgmi_get_bandwidth()
906 dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n", in amdgpu_xgmi_get_bandwidth()
907 peer_adev->gmc.xgmi.physical_node_id); in amdgpu_xgmi_get_bandwidth()
921 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_get_is_sharing_enabled()
924 /* Sharing should always be enabled for non-SRIOV. */ in amdgpu_xgmi_get_is_sharing_enabled()
928 for (i = 0 ; i < top->num_nodes; ++i) in amdgpu_xgmi_get_is_sharing_enabled()
929 if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) in amdgpu_xgmi_get_is_sharing_enabled()
930 return !!top->nodes[i].is_sharing_enabled; in amdgpu_xgmi_get_is_sharing_enabled()
939 * Hive locks and conditions apply - see amdgpu_xgmi_add_device
947 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_initialize_hive_get_data_partition()
948 ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false); in amdgpu_xgmi_initialize_hive_get_data_partition()
950 dev_err(tmp_adev->dev, in amdgpu_xgmi_initialize_hive_get_data_partition()
964 struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_fill_topology_info()
965 struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info; in amdgpu_xgmi_fill_topology_info()
967 for (int i = 0; i < peer_info->num_nodes; i++) { in amdgpu_xgmi_fill_topology_info()
968 if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) { in amdgpu_xgmi_fill_topology_info()
969 for (int j = 0; j < top_info->num_nodes; j++) { in amdgpu_xgmi_fill_topology_info()
970 if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) { in amdgpu_xgmi_fill_topology_info()
971 peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops; in amdgpu_xgmi_fill_topology_info()
972 peer_info->nodes[i].is_sharing_enabled = in amdgpu_xgmi_fill_topology_info()
973 top_info->nodes[j].is_sharing_enabled; in amdgpu_xgmi_fill_topology_info()
974 peer_info->nodes[i].num_links = in amdgpu_xgmi_fill_topology_info()
975 top_info->nodes[j].num_links; in amdgpu_xgmi_fill_topology_info()
992 if (!adev->gmc.xgmi.supported) in amdgpu_xgmi_add_device()
996 ret = psp_xgmi_initialize(&adev->psp, false, true); in amdgpu_xgmi_add_device()
998 dev_err(adev->dev, in amdgpu_xgmi_add_device()
1003 ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); in amdgpu_xgmi_add_device()
1005 dev_err(adev->dev, in amdgpu_xgmi_add_device()
1010 ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); in amdgpu_xgmi_add_device()
1012 dev_err(adev->dev, in amdgpu_xgmi_add_device()
1017 adev->gmc.xgmi.hive_id = 16; in amdgpu_xgmi_add_device()
1018 adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16; in amdgpu_xgmi_add_device()
1023 ret = -EINVAL; in amdgpu_xgmi_add_device()
1024 dev_err(adev->dev, in amdgpu_xgmi_add_device()
1026 adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id); in amdgpu_xgmi_add_device()
1029 mutex_lock(&hive->hive_lock); in amdgpu_xgmi_add_device()
1031 top_info = &adev->psp.xgmi_context.top_info; in amdgpu_xgmi_add_device()
1033 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); in amdgpu_xgmi_add_device()
1034 list_for_each_entry(entry, &hive->device_list, head) in amdgpu_xgmi_add_device()
1035 top_info->nodes[count++].node_id = entry->node_id; in amdgpu_xgmi_add_device()
1036 top_info->num_nodes = count; in amdgpu_xgmi_add_device()
1037 atomic_set(&hive->number_devices, count); in amdgpu_xgmi_add_device()
1039 task_barrier_add_task(&hive->tb); in amdgpu_xgmi_add_device()
1042 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_add_device()
1045 top_info = &tmp_adev->psp.xgmi_context.top_info; in amdgpu_xgmi_add_device()
1046 top_info->nodes[count - 1].node_id = in amdgpu_xgmi_add_device()
1047 adev->gmc.xgmi.node_id; in amdgpu_xgmi_add_device()
1048 top_info->num_nodes = count; in amdgpu_xgmi_add_device()
1056 adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { in amdgpu_xgmi_add_device()
1058 ret = psp_xgmi_get_topology_info(&adev->psp, count, in amdgpu_xgmi_add_device()
1059 &adev->psp.xgmi_context.top_info, false); in amdgpu_xgmi_add_device()
1061 dev_err(adev->dev, in amdgpu_xgmi_add_device()
1063 adev->gmc.xgmi.node_id, in amdgpu_xgmi_add_device()
1064 adev->gmc.xgmi.hive_id, ret); in amdgpu_xgmi_add_device()
1070 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_add_device()
1075 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_add_device()
1076 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, in amdgpu_xgmi_add_device()
1077 &tmp_adev->psp.xgmi_context.top_info, false); in amdgpu_xgmi_add_device()
1079 dev_err(tmp_adev->dev, in amdgpu_xgmi_add_device()
1081 tmp_adev->gmc.xgmi.node_id, in amdgpu_xgmi_add_device()
1082 tmp_adev->gmc.xgmi.hive_id, ret); in amdgpu_xgmi_add_device()
1090 if (adev->psp.xgmi_context.supports_extended_data) { in amdgpu_xgmi_add_device()
1098 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_add_device()
1099 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, in amdgpu_xgmi_add_device()
1100 &tmp_adev->psp.xgmi_context.top_info, true); in amdgpu_xgmi_add_device()
1102 dev_err(tmp_adev->dev, in amdgpu_xgmi_add_device()
1104 tmp_adev->gmc.xgmi.node_id, in amdgpu_xgmi_add_device()
1105 tmp_adev->gmc.xgmi.hive_id, ret); in amdgpu_xgmi_add_device()
1110 /* initialize the hive to get non-extended data for the next round. */ in amdgpu_xgmi_add_device()
1122 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_add_device()
1125 adev->hive = hive; in amdgpu_xgmi_add_device()
1126 dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", in amdgpu_xgmi_add_device()
1127 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); in amdgpu_xgmi_add_device()
1130 dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n", in amdgpu_xgmi_add_device()
1131 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, in amdgpu_xgmi_add_device()
1140 struct amdgpu_hive_info *hive = adev->hive; in amdgpu_xgmi_remove_device()
1142 if (!adev->gmc.xgmi.supported) in amdgpu_xgmi_remove_device()
1143 return -EINVAL; in amdgpu_xgmi_remove_device()
1146 return -EINVAL; in amdgpu_xgmi_remove_device()
1148 mutex_lock(&hive->hive_lock); in amdgpu_xgmi_remove_device()
1149 task_barrier_rem_task(&hive->tb); in amdgpu_xgmi_remove_device()
1151 if (hive->hi_req_gpu == adev) in amdgpu_xgmi_remove_device()
1152 hive->hi_req_gpu = NULL; in amdgpu_xgmi_remove_device()
1153 list_del(&adev->gmc.xgmi.head); in amdgpu_xgmi_remove_device()
1154 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_remove_device()
1157 adev->hive = NULL; in amdgpu_xgmi_remove_device()
1159 if (atomic_dec_return(&hive->number_devices) == 0) { in amdgpu_xgmi_remove_device()
1162 list_del(&hive->node); in amdgpu_xgmi_remove_device()
1171 static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, in xgmi_v6_4_0_aca_bank_parser() argument
1174 struct amdgpu_device *adev = handle->adev; in xgmi_v6_4_0_aca_bank_parser()
1184 status = bank->regs[ACA_REG_IDX_STATUS]; in xgmi_v6_4_0_aca_bank_parser()
1190 dev_info(adev->dev, "%s detected\n", error_str); in xgmi_v6_4_0_aca_bank_parser()
1192 count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); in xgmi_v6_4_0_aca_bank_parser()
1199 bank->aca_err_type = ACA_ERROR_TYPE_UE; in xgmi_v6_4_0_aca_bank_parser()
1200 ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); in xgmi_v6_4_0_aca_bank_parser()
1204 bank->aca_err_type = ACA_ERROR_TYPE_CE; in xgmi_v6_4_0_aca_bank_parser()
1205 ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count); in xgmi_v6_4_0_aca_bank_parser()
1208 return -EINVAL; in xgmi_v6_4_0_aca_bank_parser()
1228 if (!adev->gmc.xgmi.supported || in amdgpu_xgmi_ras_late_init()
1229 adev->gmc.xgmi.num_physical_nodes == 0) in amdgpu_xgmi_ras_late_init()
1261 struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi; in amdgpu_xgmi_get_relative_phy_addr()
1262 return (addr + xgmi->physical_node_id * xgmi->node_segment_size); in amdgpu_xgmi_get_relative_phy_addr()
1275 switch (adev->asic_type) { in amdgpu_xgmi_legacy_reset_ras_error_count()
1327 for_each_inst(i, adev->aid_mask) in xgmi_v6_4_0_reset_ras_error_count()
1378 /* query xgmi/walf pcs error status, in amdgpu_xgmi_query_pcs_error_status()
1385 dev_info(adev->dev, "%s detected\n", in amdgpu_xgmi_query_pcs_error_status()
1408 err_data->ue_count = 0; in amdgpu_xgmi_legacy_query_ras_error_count()
1409 err_data->ce_count = 0; in amdgpu_xgmi_legacy_query_ras_error_count()
1411 switch (adev->asic_type) { in amdgpu_xgmi_legacy_query_ras_error_count()
1413 /* check xgmi pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1420 /* check wafl pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1429 /* check xgmi pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1436 /* check wafl pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1445 /* check xgmi3x16 pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1454 /* check wafl pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1472 /* check xgmi3x16 pcs error */ in amdgpu_xgmi_legacy_query_ras_error_count()
1484 dev_warn(adev->dev, "XGMI RAS error query not supported"); in amdgpu_xgmi_legacy_query_ras_error_count()
1490 err_data->ue_count += ue_cnt; in amdgpu_xgmi_legacy_query_ras_error_count()
1491 err_data->ce_count += ce_cnt; in amdgpu_xgmi_legacy_query_ras_error_count()
1504 dev_info(adev->dev, "%s detected\n", error_str); in xgmi_v6_4_0_pcs_mca_get_error_type()
1512 return -EINVAL; in xgmi_v6_4_0_pcs_mca_get_error_type()
1515 return -EINVAL; in xgmi_v6_4_0_pcs_mca_get_error_type()
1521 int xgmi_inst = mcm_info->die_id; in __xgmi_v6_4_0_query_error_count()
1545 .socket_id = adev->smuio.funcs->get_socket_id(adev), in xgmi_v6_4_0_query_error_count()
1559 for_each_inst(i, adev->aid_mask) in xgmi_v6_4_0_query_ras_error_count()
1586 dev_warn(adev->dev, "Failed to disallow df cstate"); in amdgpu_ras_error_inject_xgmi()
1589 if (ret1 && ret1 != -EOPNOTSUPP) in amdgpu_ras_error_inject_xgmi()
1590 dev_warn(adev->dev, "Failed to disallow XGMI power down"); in amdgpu_ras_error_inject_xgmi()
1592 ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); in amdgpu_ras_error_inject_xgmi()
1598 if (ret1 && ret1 != -EOPNOTSUPP) in amdgpu_ras_error_inject_xgmi()
1599 dev_warn(adev->dev, "Failed to allow XGMI power down"); in amdgpu_ras_error_inject_xgmi()
1602 dev_warn(adev->dev, "Failed to allow df cstate"); in amdgpu_ras_error_inject_xgmi()
1625 if (!adev->gmc.xgmi.ras) in amdgpu_xgmi_ras_sw_init()
1628 ras = adev->gmc.xgmi.ras; in amdgpu_xgmi_ras_sw_init()
1629 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); in amdgpu_xgmi_ras_sw_init()
1631 dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras block!\n"); in amdgpu_xgmi_ras_sw_init()
1635 strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl"); in amdgpu_xgmi_ras_sw_init()
1636 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL; in amdgpu_xgmi_ras_sw_init()
1637 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; in amdgpu_xgmi_ras_sw_init()
1638 adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm; in amdgpu_xgmi_ras_sw_init()
1652 mutex_lock(&hive->hive_lock); in amdgpu_xgmi_reset_on_init_work()
1655 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) in amdgpu_xgmi_reset_on_init_work()
1656 list_add_tail(&tmp_adev->reset_list, &device_list); in amdgpu_xgmi_reset_on_init_work()
1660 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); in amdgpu_xgmi_reset_on_init_work()
1670 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_reset_on_init_work()
1671 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); in amdgpu_xgmi_reset_on_init_work()
1673 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_reset_on_init_work()
1675 if (r && r != -EHWPOISON) in amdgpu_xgmi_reset_on_init_work()
1676 dev_err(tmp_adev->dev, in amdgpu_xgmi_reset_on_init_work()
1683 INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work); in amdgpu_xgmi_schedule_reset_on_init()
1684 amdgpu_reset_domain_schedule(hive->reset_domain, in amdgpu_xgmi_schedule_reset_on_init()
1685 &hive->reset_on_init_work); in amdgpu_xgmi_schedule_reset_on_init()
1696 return -EINVAL; in amdgpu_xgmi_reset_on_init()
1698 mutex_lock(&hive->hive_lock); in amdgpu_xgmi_reset_on_init()
1699 num_devs = atomic_read(&hive->number_devices); in amdgpu_xgmi_reset_on_init()
1701 if (num_devs == adev->gmc.xgmi.num_physical_nodes) { in amdgpu_xgmi_reset_on_init()
1706 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_reset_on_init()
1710 flush_work(&hive->reset_on_init_work); in amdgpu_xgmi_reset_on_init()
1728 mutex_lock(&hive->hive_lock); in amdgpu_xgmi_request_nps_change()
1729 if (atomic_read(&hive->requested_nps_mode) == in amdgpu_xgmi_request_nps_change()
1731 dev_dbg(adev->dev, "Unexpected entry for hive NPS change"); in amdgpu_xgmi_request_nps_change()
1732 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_request_nps_change()
1735 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { in amdgpu_xgmi_request_nps_change()
1736 r = adev->gmc.gmc_funcs->request_mem_partition_mode( in amdgpu_xgmi_request_nps_change()
1744 adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev); in amdgpu_xgmi_request_nps_change()
1746 tmp_adev, &hive->device_list, gmc.xgmi.head) in amdgpu_xgmi_request_nps_change()
1747 adev->gmc.gmc_funcs->request_mem_partition_mode( in amdgpu_xgmi_request_nps_change()
1751 atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE); in amdgpu_xgmi_request_nps_change()
1752 mutex_unlock(&hive->hive_lock); in amdgpu_xgmi_request_nps_change()
1761 adev->gmc.xgmi.hive_id && in amdgpu_xgmi_same_hive()
1762 adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); in amdgpu_xgmi_same_hive()
1767 if (!adev->gmc.xgmi.supported) in amdgpu_xgmi_early_init()
1775 adev->gmc.xgmi.max_speed = 25; in amdgpu_xgmi_early_init()
1776 adev->gmc.xgmi.max_width = 16; in amdgpu_xgmi_early_init()
1782 adev->gmc.xgmi.max_speed = 32; in amdgpu_xgmi_early_init()
1783 adev->gmc.xgmi.max_width = 16; in amdgpu_xgmi_early_init()
1793 adev->gmc.xgmi.max_speed = max_speed; in amgpu_xgmi_set_max_speed_width()
1794 adev->gmc.xgmi.max_width = max_width; in amgpu_xgmi_set_max_speed_width()