xref: /freebsd/sys/dev/qat/qat_common/adf_heartbeat.c (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /* Copyright(c) 2007-2022 Intel Corporation */
3 /* $FreeBSD$ */
4 #include <sys/types.h>
5 #include <linux/random.h>
6 #include "qat_freebsd.h"
7 
8 #include "adf_heartbeat.h"
9 #include "adf_common_drv.h"
10 #include "adf_cfg.h"
11 #include "adf_cfg_strings.h"
12 #include "icp_qat_fw_init_admin.h"
13 #include "adf_transport_internal.h"
14 
15 #define MAX_HB_TICKS 0xFFFFFFFF
16 
17 static int
18 adf_check_hb_poll_freq(struct adf_accel_dev *accel_dev)
19 {
20 	u64 curr_hb_check_time = 0;
21 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { 0 };
22 	unsigned int timer_val = ADF_CFG_HB_DEFAULT_VALUE;
23 
24 	curr_hb_check_time = adf_clock_get_current_time();
25 
26 	if (!adf_cfg_get_param_value(accel_dev,
27 				     ADF_GENERAL_SEC,
28 				     ADF_HEARTBEAT_TIMER,
29 				     (char *)timer_str)) {
30 		if (compat_strtouint((char *)timer_str,
31 				     ADF_CFG_BASE_DEC,
32 				     &timer_val))
33 			timer_val = ADF_CFG_HB_DEFAULT_VALUE;
34 	}
35 	if ((curr_hb_check_time - accel_dev->heartbeat->last_hb_check_time) <
36 	    timer_val) {
37 		return EINVAL;
38 	}
39 	accel_dev->heartbeat->last_hb_check_time = curr_hb_check_time;
40 
41 	return 0;
42 }
43 
44 int
45 adf_heartbeat_init(struct adf_accel_dev *accel_dev)
46 {
47 	if (accel_dev->heartbeat)
48 		adf_heartbeat_clean(accel_dev);
49 
50 	accel_dev->heartbeat =
51 	    malloc(sizeof(*accel_dev->heartbeat), M_QAT, M_WAITOK | M_ZERO);
52 
53 	return 0;
54 }
55 
56 void
57 adf_heartbeat_clean(struct adf_accel_dev *accel_dev)
58 {
59 	free(accel_dev->heartbeat, M_QAT);
60 	accel_dev->heartbeat = NULL;
61 }
62 
63 int
64 adf_get_hb_timer(struct adf_accel_dev *accel_dev, unsigned int *value)
65 {
66 	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
67 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { 0 };
68 	unsigned int timer_val = ADF_CFG_HB_DEFAULT_VALUE;
69 	u32 clk_per_sec = 0;
70 
71 	if (!hw_data->get_ae_clock)
72 		return EINVAL;
73 
74 	clk_per_sec = (u32)hw_data->get_ae_clock(hw_data);
75 
76 	/* Get Heartbeat Timer value from the configuration */
77 	if (!adf_cfg_get_param_value(accel_dev,
78 				     ADF_GENERAL_SEC,
79 				     ADF_HEARTBEAT_TIMER,
80 				     (char *)timer_str)) {
81 		if (compat_strtouint((char *)timer_str,
82 				     ADF_CFG_BASE_DEC,
83 				     &timer_val))
84 			timer_val = ADF_CFG_HB_DEFAULT_VALUE;
85 	}
86 
87 	if (timer_val < ADF_MIN_HB_TIMER_MS) {
88 		device_printf(GET_DEV(accel_dev),
89 			      "%s value cannot be lesser than %u\n",
90 			      ADF_HEARTBEAT_TIMER,
91 			      ADF_MIN_HB_TIMER_MS);
92 		return EINVAL;
93 	}
94 
95 	/* Convert msec to clocks */
96 	clk_per_sec = clk_per_sec / 1000;
97 	*value = timer_val * clk_per_sec;
98 
99 	return 0;
100 }
101 
102 struct adf_hb_count {
103 	u16 ae_thread[ADF_NUM_HB_CNT_PER_AE];
104 };
105 
106 int
107 adf_get_heartbeat_status(struct adf_accel_dev *accel_dev)
108 {
109 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
110 	struct icp_qat_fw_init_admin_hb_stats *live_s =
111 	    (struct icp_qat_fw_init_admin_hb_stats *)
112 		accel_dev->admin->virt_hb_addr;
113 	const size_t max_aes = hw_device->get_num_aes(hw_device);
114 	const size_t stats_size =
115 	    max_aes * sizeof(struct icp_qat_fw_init_admin_hb_stats);
116 	int ret = 0;
117 	size_t ae, thr;
118 	unsigned long ae_mask = 0;
119 	int num_threads_per_ae = ADF_NUM_HB_CNT_PER_AE;
120 
121 	/*
122 	 * Memory layout of Heartbeat
123 	 *
124 	 * +----------------+----------------+---------+
125 	 * |   Live value   |   Last value   |  Count  |
126 	 * +----------------+----------------+---------+
127 	 * \_______________/\_______________/\________/
128 	 *         ^                ^            ^
129 	 *         |                |            |
130 	 *         |                |            max_aes * sizeof(adf_hb_count)
131 	 *         |            max_aes * sizeof(icp_qat_fw_init_admin_hb_stats)
132 	 *         max_aes * sizeof(icp_qat_fw_init_admin_hb_stats)
133 	 */
134 	struct icp_qat_fw_init_admin_hb_stats *curr_s;
135 	struct icp_qat_fw_init_admin_hb_stats *last_s = live_s + max_aes;
136 	struct adf_hb_count *count = (struct adf_hb_count *)(last_s + max_aes);
137 
138 	curr_s = malloc(stats_size, M_QAT, M_WAITOK | M_ZERO);
139 
140 	memcpy(curr_s, live_s, stats_size);
141 	ae_mask = hw_device->ae_mask;
142 
143 	for_each_set_bit(ae, &ae_mask, max_aes)
144 	{
145 		for (thr = 0; thr < num_threads_per_ae; ++thr) {
146 			struct icp_qat_fw_init_admin_hb_cnt *curr =
147 			    &curr_s[ae].stats[thr];
148 			struct icp_qat_fw_init_admin_hb_cnt *prev =
149 			    &last_s[ae].stats[thr];
150 			u16 req = curr->req_heartbeat_cnt;
151 			u16 resp = curr->resp_heartbeat_cnt;
152 			u16 last = prev->resp_heartbeat_cnt;
153 
154 			if ((thr == ADF_AE_ADMIN_THREAD || req != resp) &&
155 			    resp == last) {
156 				u16 retry = ++count[ae].ae_thread[thr];
157 
158 				if (retry >= ADF_CFG_HB_COUNT_THRESHOLD)
159 					ret = EIO;
160 			} else {
161 				count[ae].ae_thread[thr] = 0;
162 			}
163 		}
164 	}
165 
166 	/* Copy current stats for the next iteration */
167 	memcpy(last_s, curr_s, stats_size);
168 	free(curr_s, M_QAT);
169 
170 	return ret;
171 }
172 
173 int
174 adf_heartbeat_status(struct adf_accel_dev *accel_dev,
175 		     enum adf_device_heartbeat_status *hb_status)
176 {
177 	/* Heartbeat is not implemented in VFs at the moment so they do not
178 	 * set get_heartbeat_status. Also, in case the device is not up,
179 	 * unsupported should be returned */
180 	if (!accel_dev || !accel_dev->hw_device ||
181 	    !accel_dev->hw_device->get_heartbeat_status ||
182 	    !accel_dev->heartbeat) {
183 		*hb_status = DEV_HB_UNSUPPORTED;
184 		return 0;
185 	}
186 
187 	if (!adf_dev_started(accel_dev) ||
188 	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
189 		*hb_status = DEV_HB_UNRESPONSIVE;
190 		accel_dev->heartbeat->last_hb_status = DEV_HB_UNRESPONSIVE;
191 		return 0;
192 	}
193 
194 	if (adf_check_hb_poll_freq(accel_dev) == EINVAL) {
195 		*hb_status = accel_dev->heartbeat->last_hb_status;
196 		return 0;
197 	}
198 
199 	accel_dev->heartbeat->hb_sent_counter++;
200 	if (unlikely(accel_dev->hw_device->get_heartbeat_status(accel_dev))) {
201 		device_printf(GET_DEV(accel_dev),
202 			      "ERROR: QAT is not responding.\n");
203 		*hb_status = DEV_HB_UNRESPONSIVE;
204 		accel_dev->heartbeat->last_hb_status = DEV_HB_UNRESPONSIVE;
205 		accel_dev->heartbeat->hb_failed_counter++;
206 		return adf_notify_fatal_error(accel_dev);
207 	}
208 
209 	*hb_status = DEV_HB_ALIVE;
210 	accel_dev->heartbeat->last_hb_status = DEV_HB_ALIVE;
211 
212 	return 0;
213 }
214