xref: /freebsd/sys/dev/qat/qat_common/adf_heartbeat.c (revision 035dd78d30ba28a3dc15c05ec85ad10127165677)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /* Copyright(c) 2007-2022 Intel Corporation */
3 /* $FreeBSD$ */
4 #include <sys/types.h>
5 #include <linux/random.h>
6 #include "qat_freebsd.h"
7 
8 #include "adf_heartbeat.h"
9 #include "adf_common_drv.h"
10 #include "adf_cfg.h"
11 #include "adf_cfg_strings.h"
12 #include "icp_qat_fw_init_admin.h"
13 #include "adf_transport_internal.h"
14 
15 #define MAX_HB_TICKS 0xFFFFFFFF
16 
17 static int
18 adf_check_hb_poll_freq(struct adf_accel_dev *accel_dev)
19 {
20 	u64 curr_hb_check_time = 0;
21 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { 0 };
22 	unsigned int timer_val = ADF_CFG_HB_DEFAULT_VALUE;
23 
24 	curr_hb_check_time = adf_clock_get_current_time();
25 
26 	if (!adf_cfg_get_param_value(accel_dev,
27 				     ADF_GENERAL_SEC,
28 				     ADF_HEARTBEAT_TIMER,
29 				     (char *)timer_str)) {
30 		if (compat_strtouint((char *)timer_str,
31 				     ADF_CFG_BASE_DEC,
32 				     &timer_val))
33 			timer_val = ADF_CFG_HB_DEFAULT_VALUE;
34 	}
35 	if ((curr_hb_check_time - accel_dev->heartbeat->last_hb_check_time) <
36 	    timer_val) {
37 		return EINVAL;
38 	}
39 	accel_dev->heartbeat->last_hb_check_time = curr_hb_check_time;
40 
41 	return 0;
42 }
43 
44 int
45 adf_heartbeat_init(struct adf_accel_dev *accel_dev)
46 {
47 	if (accel_dev->heartbeat)
48 		adf_heartbeat_clean(accel_dev);
49 
50 	accel_dev->heartbeat =
51 	    malloc(sizeof(*accel_dev->heartbeat), M_QAT, M_WAITOK | M_ZERO);
52 
53 	return 0;
54 }
55 
56 void
57 adf_heartbeat_clean(struct adf_accel_dev *accel_dev)
58 {
59 	free(accel_dev->heartbeat, M_QAT);
60 	accel_dev->heartbeat = NULL;
61 }
62 
63 int
64 adf_get_hb_timer(struct adf_accel_dev *accel_dev, unsigned int *value)
65 {
66 	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
67 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { 0 };
68 	unsigned int timer_val = ADF_CFG_HB_DEFAULT_VALUE;
69 	u32 clk_per_sec = 0;
70 
71 	/* HB clock may be different than AE clock */
72 	if (hw_data->get_hb_clock) {
73 		clk_per_sec = (u32)hw_data->get_hb_clock(hw_data);
74 	} else if (hw_data->get_ae_clock) {
75 		clk_per_sec = (u32)hw_data->get_ae_clock(hw_data);
76 	} else {
77 		return EINVAL;
78 	}
79 
80 	/* Get Heartbeat Timer value from the configuration */
81 	if (!adf_cfg_get_param_value(accel_dev,
82 				     ADF_GENERAL_SEC,
83 				     ADF_HEARTBEAT_TIMER,
84 				     (char *)timer_str)) {
85 		if (compat_strtouint((char *)timer_str,
86 				     ADF_CFG_BASE_DEC,
87 				     &timer_val))
88 			timer_val = ADF_CFG_HB_DEFAULT_VALUE;
89 	}
90 
91 	if (timer_val < ADF_MIN_HB_TIMER_MS) {
92 		device_printf(GET_DEV(accel_dev),
93 			      "%s value cannot be lesser than %u\n",
94 			      ADF_HEARTBEAT_TIMER,
95 			      ADF_MIN_HB_TIMER_MS);
96 		return EINVAL;
97 	}
98 
99 	/* Convert msec to clocks */
100 	clk_per_sec = clk_per_sec / 1000;
101 	*value = timer_val * clk_per_sec;
102 
103 	return 0;
104 }
105 
106 int
107 adf_get_heartbeat_status(struct adf_accel_dev *accel_dev)
108 {
109 	struct icp_qat_fw_init_admin_hb_cnt *live_s, *last_s, *curr_s;
110 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
111 	const size_t max_aes = hw_device->get_num_aes(hw_device);
112 	const size_t hb_ctrs = hw_device->heartbeat_ctr_num;
113 	const size_t stats_size =
114 	    max_aes * hb_ctrs * sizeof(struct icp_qat_fw_init_admin_hb_cnt);
115 	int ret = 0;
116 	size_t ae, thr;
117 	u16 *count_s;
118 	unsigned long ae_mask = 0;
119 
120 	/*
121 	 * Memory layout of Heartbeat
122 	 *
123 	 * +----------------+----------------+---------+
124 	 * |   Live value   |   Last value   |  Count  |
125 	 * +----------------+----------------+---------+
126 	 * \_______________/\_______________/\________/
127 	 *         ^                ^            ^
128 	 *         |                |            |
129 	 *         |                |            max_aes * hb_ctrs *
130 	 *         |                |            sizeof(u16)
131 	 *         |                |
132 	 *         |                max_aes * hb_ctrs *
133 	 *         |                sizeof(icp_qat_fw_init_admin_hb_cnt)
134 	 *         |
135 	 *         max_aes * hb_ctrs *
136 	 *         sizeof(icp_qat_fw_init_admin_hb_cnt)
137 	 */
138 	live_s = (struct icp_qat_fw_init_admin_hb_cnt *)
139 		     accel_dev->admin->virt_hb_addr;
140 	last_s = live_s + (max_aes * hb_ctrs);
141 	count_s = (u16 *)(last_s + (max_aes * hb_ctrs));
142 
143 	curr_s = malloc(stats_size, M_QAT, M_WAITOK | M_ZERO);
144 
145 	memcpy(curr_s, live_s, stats_size);
146 	ae_mask = hw_device->ae_mask;
147 
148 	for_each_set_bit(ae, &ae_mask, max_aes)
149 	{
150 		struct icp_qat_fw_init_admin_hb_cnt *curr =
151 		    curr_s + ae * hb_ctrs;
152 		struct icp_qat_fw_init_admin_hb_cnt *prev =
153 		    last_s + ae * hb_ctrs;
154 		u16 *count = count_s + ae * hb_ctrs;
155 
156 		for (thr = 0; thr < hb_ctrs; ++thr) {
157 			u16 req = curr[thr].req_heartbeat_cnt;
158 			u16 resp = curr[thr].resp_heartbeat_cnt;
159 			u16 last = prev[thr].resp_heartbeat_cnt;
160 
161 			if ((thr == ADF_AE_ADMIN_THREAD || req != resp) &&
162 			    resp == last) {
163 				u16 retry = ++count[thr];
164 
165 				if (retry >= ADF_CFG_HB_COUNT_THRESHOLD)
166 					ret = EIO;
167 			} else {
168 				count[thr] = 0;
169 			}
170 		}
171 	}
172 
173 	/* Copy current stats for the next iteration */
174 	memcpy(last_s, curr_s, stats_size);
175 	free(curr_s, M_QAT);
176 
177 	return ret;
178 }
179 
180 int
181 adf_heartbeat_status(struct adf_accel_dev *accel_dev,
182 		     enum adf_device_heartbeat_status *hb_status)
183 {
184 	/* Heartbeat is not implemented in VFs at the moment so they do not
185 	 * set get_heartbeat_status. Also, in case the device is not up,
186 	 * unsupported should be returned */
187 	if (!accel_dev || !accel_dev->hw_device ||
188 	    !accel_dev->hw_device->get_heartbeat_status ||
189 	    !accel_dev->heartbeat) {
190 		*hb_status = DEV_HB_UNSUPPORTED;
191 		return 0;
192 	}
193 
194 	if (!adf_dev_started(accel_dev) ||
195 	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
196 		*hb_status = DEV_HB_UNRESPONSIVE;
197 		accel_dev->heartbeat->last_hb_status = DEV_HB_UNRESPONSIVE;
198 		return 0;
199 	}
200 
201 	if (adf_check_hb_poll_freq(accel_dev) == EINVAL) {
202 		*hb_status = accel_dev->heartbeat->last_hb_status;
203 		return 0;
204 	}
205 
206 	accel_dev->heartbeat->hb_sent_counter++;
207 	if (unlikely(accel_dev->hw_device->get_heartbeat_status(accel_dev))) {
208 		device_printf(GET_DEV(accel_dev),
209 			      "ERROR: QAT is not responding.\n");
210 		*hb_status = DEV_HB_UNRESPONSIVE;
211 		accel_dev->heartbeat->last_hb_status = DEV_HB_UNRESPONSIVE;
212 		accel_dev->heartbeat->hb_failed_counter++;
213 		return adf_notify_fatal_error(accel_dev);
214 	}
215 
216 	*hb_status = DEV_HB_ALIVE;
217 	accel_dev->heartbeat->last_hb_status = DEV_HB_ALIVE;
218 
219 	return 0;
220 }
221