xref: /freebsd/sys/dev/qat/qat_common/adf_heartbeat.c (revision 71625ec9ad2a9bc8c09784fbd23b759830e0ee5f)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /* Copyright(c) 2007-2022 Intel Corporation */
3 #include <sys/types.h>
4 #include <linux/random.h>
5 #include "qat_freebsd.h"
6 
7 #include "adf_heartbeat.h"
8 #include "adf_common_drv.h"
9 #include "adf_cfg.h"
10 #include "adf_cfg_strings.h"
11 #include "icp_qat_fw_init_admin.h"
12 #include "adf_transport_internal.h"
13 
14 #define MAX_HB_TICKS 0xFFFFFFFF
15 
16 static int
adf_check_hb_poll_freq(struct adf_accel_dev * accel_dev)17 adf_check_hb_poll_freq(struct adf_accel_dev *accel_dev)
18 {
19 	u64 curr_hb_check_time = 0;
20 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { 0 };
21 	unsigned int timer_val = ADF_CFG_HB_DEFAULT_VALUE;
22 
23 	curr_hb_check_time = adf_clock_get_current_time();
24 
25 	if (!adf_cfg_get_param_value(accel_dev,
26 				     ADF_GENERAL_SEC,
27 				     ADF_HEARTBEAT_TIMER,
28 				     (char *)timer_str)) {
29 		if (compat_strtouint((char *)timer_str,
30 				     ADF_CFG_BASE_DEC,
31 				     &timer_val))
32 			timer_val = ADF_CFG_HB_DEFAULT_VALUE;
33 	}
34 	if ((curr_hb_check_time - accel_dev->heartbeat->last_hb_check_time) <
35 	    timer_val) {
36 		return EINVAL;
37 	}
38 	accel_dev->heartbeat->last_hb_check_time = curr_hb_check_time;
39 
40 	return 0;
41 }
42 
43 int
adf_heartbeat_init(struct adf_accel_dev * accel_dev)44 adf_heartbeat_init(struct adf_accel_dev *accel_dev)
45 {
46 	if (accel_dev->heartbeat)
47 		adf_heartbeat_clean(accel_dev);
48 
49 	accel_dev->heartbeat =
50 	    malloc(sizeof(*accel_dev->heartbeat), M_QAT, M_WAITOK | M_ZERO);
51 
52 	return 0;
53 }
54 
55 void
adf_heartbeat_clean(struct adf_accel_dev * accel_dev)56 adf_heartbeat_clean(struct adf_accel_dev *accel_dev)
57 {
58 	free(accel_dev->heartbeat, M_QAT);
59 	accel_dev->heartbeat = NULL;
60 }
61 
62 int
adf_get_hb_timer(struct adf_accel_dev * accel_dev,unsigned int * value)63 adf_get_hb_timer(struct adf_accel_dev *accel_dev, unsigned int *value)
64 {
65 	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
66 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { 0 };
67 	unsigned int timer_val = ADF_CFG_HB_DEFAULT_VALUE;
68 	u32 clk_per_sec = 0;
69 
70 	/* HB clock may be different than AE clock */
71 	if (hw_data->get_hb_clock) {
72 		clk_per_sec = (u32)hw_data->get_hb_clock(hw_data);
73 	} else if (hw_data->get_ae_clock) {
74 		clk_per_sec = (u32)hw_data->get_ae_clock(hw_data);
75 	} else {
76 		return EINVAL;
77 	}
78 
79 	/* Get Heartbeat Timer value from the configuration */
80 	if (!adf_cfg_get_param_value(accel_dev,
81 				     ADF_GENERAL_SEC,
82 				     ADF_HEARTBEAT_TIMER,
83 				     (char *)timer_str)) {
84 		if (compat_strtouint((char *)timer_str,
85 				     ADF_CFG_BASE_DEC,
86 				     &timer_val))
87 			timer_val = ADF_CFG_HB_DEFAULT_VALUE;
88 	}
89 
90 	if (timer_val < ADF_MIN_HB_TIMER_MS) {
91 		device_printf(GET_DEV(accel_dev),
92 			      "%s value cannot be lesser than %u\n",
93 			      ADF_HEARTBEAT_TIMER,
94 			      ADF_MIN_HB_TIMER_MS);
95 		return EINVAL;
96 	}
97 
98 	/* Convert msec to clocks */
99 	clk_per_sec = clk_per_sec / 1000;
100 	*value = timer_val * clk_per_sec;
101 
102 	return 0;
103 }
104 
105 int
adf_get_heartbeat_status(struct adf_accel_dev * accel_dev)106 adf_get_heartbeat_status(struct adf_accel_dev *accel_dev)
107 {
108 	struct icp_qat_fw_init_admin_hb_cnt *live_s, *last_s, *curr_s;
109 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
110 	const size_t max_aes = hw_device->get_num_aes(hw_device);
111 	const size_t hb_ctrs = hw_device->heartbeat_ctr_num;
112 	const size_t stats_size =
113 	    max_aes * hb_ctrs * sizeof(struct icp_qat_fw_init_admin_hb_cnt);
114 	int ret = 0;
115 	size_t ae, thr;
116 	u16 *count_s;
117 	unsigned long ae_mask = 0;
118 
119 	/*
120 	 * Memory layout of Heartbeat
121 	 *
122 	 * +----------------+----------------+---------+
123 	 * |   Live value   |   Last value   |  Count  |
124 	 * +----------------+----------------+---------+
125 	 * \_______________/\_______________/\________/
126 	 *         ^                ^            ^
127 	 *         |                |            |
128 	 *         |                |            max_aes * hb_ctrs *
129 	 *         |                |            sizeof(u16)
130 	 *         |                |
131 	 *         |                max_aes * hb_ctrs *
132 	 *         |                sizeof(icp_qat_fw_init_admin_hb_cnt)
133 	 *         |
134 	 *         max_aes * hb_ctrs *
135 	 *         sizeof(icp_qat_fw_init_admin_hb_cnt)
136 	 */
137 	live_s = (struct icp_qat_fw_init_admin_hb_cnt *)
138 		     accel_dev->admin->virt_hb_addr;
139 	last_s = live_s + (max_aes * hb_ctrs);
140 	count_s = (u16 *)(last_s + (max_aes * hb_ctrs));
141 
142 	curr_s = malloc(stats_size, M_QAT, M_WAITOK | M_ZERO);
143 
144 	memcpy(curr_s, live_s, stats_size);
145 	ae_mask = hw_device->ae_mask;
146 
147 	for_each_set_bit(ae, &ae_mask, max_aes)
148 	{
149 		struct icp_qat_fw_init_admin_hb_cnt *curr =
150 		    curr_s + ae * hb_ctrs;
151 		struct icp_qat_fw_init_admin_hb_cnt *prev =
152 		    last_s + ae * hb_ctrs;
153 		u16 *count = count_s + ae * hb_ctrs;
154 
155 		for (thr = 0; thr < hb_ctrs; ++thr) {
156 			u16 req = curr[thr].req_heartbeat_cnt;
157 			u16 resp = curr[thr].resp_heartbeat_cnt;
158 			u16 last = prev[thr].resp_heartbeat_cnt;
159 
160 			if ((thr == ADF_AE_ADMIN_THREAD || req != resp) &&
161 			    resp == last) {
162 				u16 retry = ++count[thr];
163 
164 				if (retry >= ADF_CFG_HB_COUNT_THRESHOLD)
165 					ret = EIO;
166 			} else {
167 				count[thr] = 0;
168 			}
169 		}
170 	}
171 
172 	/* Copy current stats for the next iteration */
173 	memcpy(last_s, curr_s, stats_size);
174 	free(curr_s, M_QAT);
175 
176 	return ret;
177 }
178 
179 int
adf_heartbeat_status(struct adf_accel_dev * accel_dev,enum adf_device_heartbeat_status * hb_status)180 adf_heartbeat_status(struct adf_accel_dev *accel_dev,
181 		     enum adf_device_heartbeat_status *hb_status)
182 {
183 	/* Heartbeat is not implemented in VFs at the moment so they do not
184 	 * set get_heartbeat_status. Also, in case the device is not up,
185 	 * unsupported should be returned */
186 	if (!accel_dev || !accel_dev->hw_device ||
187 	    !accel_dev->hw_device->get_heartbeat_status ||
188 	    !accel_dev->heartbeat) {
189 		*hb_status = DEV_HB_UNSUPPORTED;
190 		return 0;
191 	}
192 
193 	if (!adf_dev_started(accel_dev) ||
194 	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
195 		*hb_status = DEV_HB_UNRESPONSIVE;
196 		accel_dev->heartbeat->last_hb_status = DEV_HB_UNRESPONSIVE;
197 		return 0;
198 	}
199 
200 	if (adf_check_hb_poll_freq(accel_dev) == EINVAL) {
201 		*hb_status = accel_dev->heartbeat->last_hb_status;
202 		return 0;
203 	}
204 
205 	accel_dev->heartbeat->hb_sent_counter++;
206 	if (unlikely(accel_dev->hw_device->get_heartbeat_status(accel_dev))) {
207 		device_printf(GET_DEV(accel_dev),
208 			      "ERROR: QAT is not responding.\n");
209 		*hb_status = DEV_HB_UNRESPONSIVE;
210 		accel_dev->heartbeat->last_hb_status = DEV_HB_UNRESPONSIVE;
211 		accel_dev->heartbeat->hb_failed_counter++;
212 		return adf_notify_fatal_error(accel_dev);
213 	}
214 
215 	*hb_status = DEV_HB_ALIVE;
216 	accel_dev->heartbeat->last_hb_status = DEV_HB_ALIVE;
217 
218 	return 0;
219 }
220