Lines Matching +full:ch +full:- +full:func

41 		dev_warn(adev->dev,  in amdgpu_umc_convert_error_address()
60 kcalloc(adev->umc.max_ras_err_cnt_per_query, in amdgpu_umc_page_retirement_mca()
63 dev_warn(adev->dev, in amdgpu_umc_page_retirement_mca()
69 err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query; in amdgpu_umc_page_retirement_mca()
105 mutex_lock(&con->page_retirement_lock); in amdgpu_umc_handle_bad_pages()
106 ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); in amdgpu_umc_handle_bad_pages()
107 if (ret == -EOPNOTSUPP && in amdgpu_umc_handle_bad_pages()
109 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && in amdgpu_umc_handle_bad_pages()
110 adev->umc.ras->ras_block.hw_ops->query_ras_error_count) in amdgpu_umc_handle_bad_pages()
111 adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); in amdgpu_umc_handle_bad_pages()
113 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && in amdgpu_umc_handle_bad_pages()
114 adev->umc.ras->ras_block.hw_ops->query_ras_error_address && in amdgpu_umc_handle_bad_pages()
115 adev->umc.max_ras_err_cnt_per_query) { in amdgpu_umc_handle_bad_pages()
116 err_data->err_addr = in amdgpu_umc_handle_bad_pages()
117 kcalloc(adev->umc.max_ras_err_cnt_per_query, in amdgpu_umc_handle_bad_pages()
123 if(!err_data->err_addr) in amdgpu_umc_handle_bad_pages()
124 dev_warn(adev->dev, "Failed to alloc memory for " in amdgpu_umc_handle_bad_pages()
127 err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; in amdgpu_umc_handle_bad_pages()
132 adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); in amdgpu_umc_handle_bad_pages()
136 if (adev->umc.ras && in amdgpu_umc_handle_bad_pages()
137 adev->umc.ras->ecc_info_query_ras_error_count) in amdgpu_umc_handle_bad_pages()
138 adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); in amdgpu_umc_handle_bad_pages()
140 if (adev->umc.ras && in amdgpu_umc_handle_bad_pages()
141 adev->umc.ras->ecc_info_query_ras_error_address && in amdgpu_umc_handle_bad_pages()
142 adev->umc.max_ras_err_cnt_per_query) { in amdgpu_umc_handle_bad_pages()
143 err_data->err_addr = in amdgpu_umc_handle_bad_pages()
144 kcalloc(adev->umc.max_ras_err_cnt_per_query, in amdgpu_umc_handle_bad_pages()
150 if(!err_data->err_addr) in amdgpu_umc_handle_bad_pages()
151 dev_warn(adev->dev, "Failed to alloc memory for " in amdgpu_umc_handle_bad_pages()
154 err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; in amdgpu_umc_handle_bad_pages()
159 adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status); in amdgpu_umc_handle_bad_pages()
164 if (err_data->ue_count || err_data->de_count) { in amdgpu_umc_handle_bad_pages()
165 err_count = err_data->ue_count + err_data->de_count; in amdgpu_umc_handle_bad_pages()
167 err_data->err_addr_cnt) { in amdgpu_umc_handle_bad_pages()
168 amdgpu_ras_add_bad_pages(adev, err_data->err_addr, in amdgpu_umc_handle_bad_pages()
169 err_data->err_addr_cnt); in amdgpu_umc_handle_bad_pages()
172 amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); in amdgpu_umc_handle_bad_pages()
174 if (con->update_channel_flag == true) { in amdgpu_umc_handle_bad_pages()
175 amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); in amdgpu_umc_handle_bad_pages()
176 con->update_channel_flag = false; in amdgpu_umc_handle_bad_pages()
181 kfree(err_data->err_addr); in amdgpu_umc_handle_bad_pages()
182 err_data->err_addr = NULL; in amdgpu_umc_handle_bad_pages()
184 mutex_unlock(&con->page_retirement_lock); in amdgpu_umc_handle_bad_pages()
195 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); in amdgpu_umc_do_page_retirement()
198 if ((err_data->ue_count || err_data->de_count) && in amdgpu_umc_do_page_retirement()
200 con->gpu_reset_flags |= reset; in amdgpu_umc_do_page_retirement()
213 if (adev->gmc.xgmi.connected_to_cpu || in amdgpu_umc_pasid_poison_handler()
214 adev->gmc.is_app_apu) { in amdgpu_umc_pasid_poison_handler()
219 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); in amdgpu_umc_pasid_poison_handler()
240 obj->err_data.ue_count += err_data.ue_count; in amdgpu_umc_pasid_poison_handler()
241 obj->err_data.ce_count += err_data.ce_count; in amdgpu_umc_pasid_poison_handler()
242 obj->err_data.de_count += err_data.de_count; in amdgpu_umc_pasid_poison_handler()
253 atomic_inc(&con->page_retirement_req_cnt); in amdgpu_umc_pasid_poison_handler()
254 wake_up(&con->page_retirement_wq); in amdgpu_umc_pasid_poison_handler()
258 if (adev->virt.ops && adev->virt.ops->ras_poison_handler) in amdgpu_umc_pasid_poison_handler()
259 adev->virt.ops->ras_poison_handler(adev, block); in amdgpu_umc_pasid_poison_handler()
261 dev_warn(adev->dev, in amdgpu_umc_pasid_poison_handler()
288 if (!adev->umc.ras) in amdgpu_umc_ras_sw_init()
291 ras = adev->umc.ras; in amdgpu_umc_ras_sw_init()
293 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); in amdgpu_umc_ras_sw_init()
295 dev_err(adev->dev, "Failed to register umc ras block!\n"); in amdgpu_umc_ras_sw_init()
299 strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); in amdgpu_umc_ras_sw_init()
300 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; in amdgpu_umc_ras_sw_init()
301 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; in amdgpu_umc_ras_sw_init()
302 adev->umc.ras_if = &ras->ras_block.ras_comm; in amdgpu_umc_ras_sw_init()
304 if (!ras->ras_block.ras_late_init) in amdgpu_umc_ras_sw_init()
305 ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; in amdgpu_umc_ras_sw_init()
307 if (!ras->ras_block.ras_cb) in amdgpu_umc_ras_sw_init()
308 ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; in amdgpu_umc_ras_sw_init()
324 if (amdgpu_ras_is_supported(adev, ras_block->block)) { in amdgpu_umc_ras_late_init()
325 r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); in amdgpu_umc_ras_late_init()
331 if (adev->umc.ras && in amdgpu_umc_ras_late_init()
332 adev->umc.ras->err_cnt_init) in amdgpu_umc_ras_late_init()
333 adev->umc.ras->err_cnt_init(adev); in amdgpu_umc_ras_late_init()
346 struct ras_common_if *ras_if = adev->umc.ras_if; in amdgpu_umc_process_ecc_irq()
369 !err_data->err_addr || in amdgpu_umc_fill_error_record()
370 (err_data->err_addr_cnt >= err_data->err_addr_len)) in amdgpu_umc_fill_error_record()
371 return -EINVAL; in amdgpu_umc_fill_error_record()
373 err_rec = &err_data->err_addr[err_data->err_addr_cnt]; in amdgpu_umc_fill_error_record()
375 err_rec->address = err_addr; in amdgpu_umc_fill_error_record()
377 err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; in amdgpu_umc_fill_error_record()
378 err_rec->ts = (uint64_t)ktime_get_real_seconds(); in amdgpu_umc_fill_error_record()
379 err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; in amdgpu_umc_fill_error_record()
380 err_rec->cu = 0; in amdgpu_umc_fill_error_record()
381 err_rec->mem_channel = channel_index; in amdgpu_umc_fill_error_record()
382 err_rec->mcumc_id = umc_inst; in amdgpu_umc_fill_error_record()
384 err_data->err_addr_cnt++; in amdgpu_umc_fill_error_record()
390 umc_func func, void *data) in amdgpu_umc_loop_channels() argument
397 if (adev->umc.node_inst_num) { in amdgpu_umc_loop_channels()
399 ret = func(adev, node_inst, umc_inst, ch_inst, data); in amdgpu_umc_loop_channels()
401 dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n", in amdgpu_umc_loop_channels()
408 ret = func(adev, 0, umc_inst, ch_inst, data); in amdgpu_umc_loop_channels()
410 dev_err(adev->dev, "Umc %d ch %d func returns %d\n", in amdgpu_umc_loop_channels()
423 if (adev->umc.ras->update_ecc_status) in amdgpu_umc_update_ecc_status()
424 return adev->umc.ras->update_ecc_status(adev, in amdgpu_umc_update_ecc_status()
436 ecc_log = &con->umc_ecc_log; in amdgpu_umc_logs_ecc_err()
438 mutex_lock(&ecc_log->lock); in amdgpu_umc_logs_ecc_err()
439 ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err); in amdgpu_umc_logs_ecc_err()
442 ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); in amdgpu_umc_logs_ecc_err()
443 mutex_unlock(&ecc_log->lock); in amdgpu_umc_logs_ecc_err()