1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2023 Intel Corporation */ 3 4 #include <linux/dev_printk.h> 5 #include <linux/dma-mapping.h> 6 #include <linux/export.h> 7 #include <linux/kernel.h> 8 #include <linux/kstrtox.h> 9 #include <linux/overflow.h> 10 #include <linux/string.h> 11 #include <linux/slab.h> 12 #include <linux/types.h> 13 #include <asm/errno.h> 14 #include "adf_accel_devices.h" 15 #include "adf_admin.h" 16 #include "adf_cfg.h" 17 #include "adf_cfg_strings.h" 18 #include "adf_clock.h" 19 #include "adf_common_drv.h" 20 #include "adf_heartbeat.h" 21 #include "adf_transport_internal.h" 22 #include "icp_qat_fw_init_admin.h" 23 24 #define ADF_HB_EMPTY_SIG 0xA5A5A5A5 25 26 static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev) 27 { 28 u64 curr_time = adf_clock_get_current_time(); 29 u64 polling_time = curr_time - accel_dev->heartbeat->last_hb_check_time; 30 31 if (polling_time < accel_dev->heartbeat->hb_timer) { 32 dev_warn(&GET_DEV(accel_dev), 33 "HB polling too frequent. Configured HB timer %d ms\n", 34 accel_dev->heartbeat->hb_timer); 35 return -EINVAL; 36 } 37 38 accel_dev->heartbeat->last_hb_check_time = curr_time; 39 return 0; 40 } 41 42 /** 43 * validate_hb_ctrs_cnt() - checks if the number of heartbeat counters should 44 * be updated by one to support the currently loaded firmware. 45 * @accel_dev: Pointer to acceleration device. 46 * 47 * Return: 48 * * true - hb_ctrs must increased by ADF_NUM_PKE_STRAND 49 * * false - no changes needed 50 */ 51 static bool validate_hb_ctrs_cnt(struct adf_accel_dev *accel_dev) 52 { 53 const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs; 54 const size_t max_aes = accel_dev->hw_device->num_engines; 55 const size_t hb_struct_size = sizeof(struct hb_cnt_pair); 56 const size_t exp_diff_size = array3_size(ADF_NUM_PKE_STRAND, max_aes, 57 hb_struct_size); 58 const size_t dev_ctrs = size_mul(max_aes, hb_ctrs); 59 const size_t stats_size = size_mul(dev_ctrs, hb_struct_size); 60 const u32 exp_diff_cnt = exp_diff_size / sizeof(u32); 61 const u32 stats_el_cnt = stats_size / sizeof(u32); 62 struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr; 63 const u32 *mem_to_chk = (u32 *)(hb_stats + dev_ctrs); 64 u32 el_diff_cnt = 0; 65 int i; 66 67 /* count how many bytes are different from pattern */ 68 for (i = 0; i < stats_el_cnt; i++) { 69 if (mem_to_chk[i] == ADF_HB_EMPTY_SIG) 70 break; 71 72 el_diff_cnt++; 73 } 74 75 return el_diff_cnt && el_diff_cnt == exp_diff_cnt; 76 } 77 78 void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev) 79 { 80 struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr; 81 const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs; 82 const size_t max_aes = accel_dev->hw_device->num_engines; 83 const size_t dev_ctrs = size_mul(max_aes, hb_ctrs); 84 const size_t stats_size = size_mul(dev_ctrs, sizeof(struct hb_cnt_pair)); 85 const size_t mem_items_to_fill = size_mul(stats_size, 2) / sizeof(u32); 86 87 /* fill hb stats memory with pattern */ 88 memset32((uint32_t *)hb_stats, ADF_HB_EMPTY_SIG, mem_items_to_fill); 89 accel_dev->heartbeat->ctrs_cnt_checked = false; 90 } 91 EXPORT_SYMBOL_GPL(adf_heartbeat_check_ctrs); 92 93 static int get_timer_ticks(struct adf_accel_dev *accel_dev, unsigned int *value) 94 { 95 char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { }; 96 u32 timer_ms = ADF_CFG_HB_TIMER_DEFAULT_MS; 97 int cfg_read_status; 98 u32 ticks; 99 int ret; 100 101 cfg_read_status = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, 102 ADF_HEARTBEAT_TIMER, timer_str); 103 if (cfg_read_status == 0) { 104 if (kstrtouint(timer_str, 10, &timer_ms)) 105 dev_dbg(&GET_DEV(accel_dev), 106 "kstrtouint failed to parse the %s, param value", 107 ADF_HEARTBEAT_TIMER); 108 } 109 110 if (timer_ms < ADF_CFG_HB_TIMER_MIN_MS) { 111 dev_err(&GET_DEV(accel_dev), "Timer cannot be less than %u\n", 112 ADF_CFG_HB_TIMER_MIN_MS); 113 return -EINVAL; 114 } 115 116 /* 117 * On 4xxx devices adf_timer is responsible for HB updates and 118 * its period is fixed to 200ms 119 */ 120 if (accel_dev->timer) 121 timer_ms = ADF_CFG_HB_TIMER_MIN_MS; 122 123 ret = adf_heartbeat_ms_to_ticks(accel_dev, timer_ms, &ticks); 124 if (ret) 125 return ret; 126 127 adf_heartbeat_save_cfg_param(accel_dev, timer_ms); 128 129 accel_dev->heartbeat->hb_timer = timer_ms; 130 *value = ticks; 131 132 return 0; 133 } 134 135 static int check_ae(struct hb_cnt_pair *curr, struct hb_cnt_pair *prev, 136 u16 *count, const size_t hb_ctrs) 137 { 138 size_t thr; 139 140 /* loop through all threads in AE */ 141 for (thr = 0; thr < hb_ctrs; thr++) { 142 u16 req = curr[thr].req_heartbeat_cnt; 143 u16 resp = curr[thr].resp_heartbeat_cnt; 144 u16 last = prev[thr].resp_heartbeat_cnt; 145 146 if ((thr == ADF_AE_ADMIN_THREAD || req != resp) && resp == last) { 147 u16 retry = ++count[thr]; 148 149 if (retry >= ADF_CFG_HB_COUNT_THRESHOLD) 150 return -EIO; 151 152 } else { 153 count[thr] = 0; 154 } 155 } 156 return 0; 157 } 158 159 static int adf_hb_get_status(struct adf_accel_dev *accel_dev) 160 { 161 struct adf_hw_device_data *hw_device = accel_dev->hw_device; 162 struct hb_cnt_pair *live_stats, *last_stats, *curr_stats; 163 const size_t hb_ctrs = hw_device->num_hb_ctrs; 164 const unsigned long ae_mask = hw_device->ae_mask; 165 const size_t max_aes = hw_device->num_engines; 166 const size_t dev_ctrs = size_mul(max_aes, hb_ctrs); 167 const size_t stats_size = size_mul(dev_ctrs, sizeof(*curr_stats)); 168 struct hb_cnt_pair *ae_curr_p, *ae_prev_p; 169 u16 *count_fails, *ae_count_p; 170 size_t ae_offset; 171 size_t ae = 0; 172 int ret = 0; 173 174 if (!accel_dev->heartbeat->ctrs_cnt_checked) { 175 if (validate_hb_ctrs_cnt(accel_dev)) 176 hw_device->num_hb_ctrs += ADF_NUM_PKE_STRAND; 177 178 accel_dev->heartbeat->ctrs_cnt_checked = true; 179 } 180 181 live_stats = accel_dev->heartbeat->dma.virt_addr; 182 last_stats = live_stats + dev_ctrs; 183 count_fails = (u16 *)(last_stats + dev_ctrs); 184 185 curr_stats = kmemdup(live_stats, stats_size, GFP_KERNEL); 186 if (!curr_stats) 187 return -ENOMEM; 188 189 /* loop through active AEs */ 190 for_each_set_bit(ae, &ae_mask, max_aes) { 191 ae_offset = size_mul(ae, hb_ctrs); 192 ae_curr_p = curr_stats + ae_offset; 193 ae_prev_p = last_stats + ae_offset; 194 ae_count_p = count_fails + ae_offset; 195 196 ret = check_ae(ae_curr_p, ae_prev_p, ae_count_p, hb_ctrs); 197 if (ret) 198 break; 199 } 200 201 /* Copy current stats for the next iteration */ 202 memcpy(last_stats, curr_stats, stats_size); 203 kfree(curr_stats); 204 205 return ret; 206 } 207 208 static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev) 209 { 210 u64 curr_time = adf_clock_get_current_time(); 211 u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time; 212 213 if (time_since_reset < ADF_CFG_HB_RESET_MS) 214 return; 215 216 accel_dev->heartbeat->last_hb_reset_time = curr_time; 217 if (adf_notify_fatal_error(accel_dev)) 218 dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n"); 219 } 220 221 void adf_heartbeat_status(struct adf_accel_dev *accel_dev, 222 enum adf_device_heartbeat_status *hb_status) 223 { 224 struct adf_heartbeat *hb; 225 226 if (!adf_dev_started(accel_dev) || 227 test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) { 228 *hb_status = HB_DEV_UNRESPONSIVE; 229 return; 230 } 231 232 if (adf_hb_check_polling_freq(accel_dev) == -EINVAL) { 233 *hb_status = HB_DEV_UNSUPPORTED; 234 return; 235 } 236 237 hb = accel_dev->heartbeat; 238 hb->hb_sent_counter++; 239 240 if (adf_hb_get_status(accel_dev)) { 241 dev_err(&GET_DEV(accel_dev), 242 "Heartbeat ERROR: QAT is not responding.\n"); 243 *hb_status = HB_DEV_UNRESPONSIVE; 244 hb->hb_failed_counter++; 245 adf_heartbeat_reset(accel_dev); 246 return; 247 } 248 249 *hb_status = HB_DEV_ALIVE; 250 } 251 252 int adf_heartbeat_ms_to_ticks(struct adf_accel_dev *accel_dev, unsigned int time_ms, 253 u32 *value) 254 { 255 struct adf_hw_device_data *hw_data = accel_dev->hw_device; 256 u32 clk_per_sec; 257 258 /* HB clock may be different than AE clock */ 259 if (!hw_data->get_hb_clock) 260 return -EINVAL; 261 262 clk_per_sec = hw_data->get_hb_clock(hw_data); 263 *value = time_ms * (clk_per_sec / MSEC_PER_SEC); 264 265 return 0; 266 } 267 268 int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev, 269 unsigned int timer_ms) 270 { 271 char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; 272 273 snprintf(timer_str, sizeof(timer_str), "%u", timer_ms); 274 return adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, 275 ADF_HEARTBEAT_TIMER, timer_str, 276 ADF_STR); 277 } 278 EXPORT_SYMBOL_GPL(adf_heartbeat_save_cfg_param); 279 280 int adf_heartbeat_init(struct adf_accel_dev *accel_dev) 281 { 282 struct adf_heartbeat *hb; 283 284 hb = kzalloc(sizeof(*hb), GFP_KERNEL); 285 if (!hb) 286 goto err_ret; 287 288 hb->dma.virt_addr = dma_alloc_coherent(&GET_DEV(accel_dev), PAGE_SIZE, 289 &hb->dma.phy_addr, GFP_KERNEL); 290 if (!hb->dma.virt_addr) 291 goto err_free; 292 293 /* 294 * Default set this flag as true to avoid unnecessary checks, 295 * it will be reset on platforms that need such a check 296 */ 297 hb->ctrs_cnt_checked = true; 298 accel_dev->heartbeat = hb; 299 300 return 0; 301 302 err_free: 303 kfree(hb); 304 err_ret: 305 return -ENOMEM; 306 } 307 308 int adf_heartbeat_start(struct adf_accel_dev *accel_dev) 309 { 310 unsigned int timer_ticks; 311 int ret; 312 313 if (!accel_dev->heartbeat) { 314 dev_warn(&GET_DEV(accel_dev), "Heartbeat instance not found!"); 315 return -EFAULT; 316 } 317 318 if (accel_dev->hw_device->check_hb_ctrs) 319 accel_dev->hw_device->check_hb_ctrs(accel_dev); 320 321 ret = get_timer_ticks(accel_dev, &timer_ticks); 322 if (ret) 323 return ret; 324 325 ret = adf_send_admin_hb_timer(accel_dev, timer_ticks); 326 if (ret) 327 dev_warn(&GET_DEV(accel_dev), "Heartbeat not supported!"); 328 329 return ret; 330 } 331 332 void adf_heartbeat_shutdown(struct adf_accel_dev *accel_dev) 333 { 334 struct adf_heartbeat *hb = accel_dev->heartbeat; 335 336 if (!hb) 337 return; 338 339 if (hb->dma.virt_addr) 340 dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE, 341 hb->dma.virt_addr, hb->dma.phy_addr); 342 343 kfree(hb); 344 accel_dev->heartbeat = NULL; 345 } 346