xref: /linux/drivers/infiniband/hw/hfi1/opfn.c (revision b60a5b8dcf49af9f2c60ae82e0383ee8e62a9a52)
1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3  * Copyright(c) 2018 Intel Corporation.
4  *
5  */
6 #include "hfi.h"
7 #include "trace.h"
8 #include "qp.h"
9 #include "opfn.h"
10 
11 #define IB_BTHE_E                 BIT(IB_BTHE_E_SHIFT)
12 
13 #define OPFN_CODE(code) BIT((code) - 1)
14 #define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
15 
16 struct hfi1_opfn_type {
17 	bool (*request)(struct rvt_qp *qp, u64 *data);
18 	bool (*response)(struct rvt_qp *qp, u64 *data);
19 	bool (*reply)(struct rvt_qp *qp, u64 data);
20 	void (*error)(struct rvt_qp *qp);
21 };
22 
23 static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
24 	[STL_VERBS_EXTD_TID_RDMA] = {
25 		.request = tid_rdma_conn_req,
26 		.response = tid_rdma_conn_resp,
27 		.reply = tid_rdma_conn_reply,
28 		.error = tid_rdma_conn_error,
29 	},
30 };
31 
32 static struct workqueue_struct *opfn_wq;
33 
34 static void opfn_schedule_conn_request(struct rvt_qp *qp);
35 
36 static bool hfi1_opfn_extended(u32 bth1)
37 {
38 	return !!(bth1 & IB_BTHE_E);
39 }
40 
41 static void opfn_conn_request(struct rvt_qp *qp)
42 {
43 	struct hfi1_qp_priv *priv = qp->priv;
44 	struct ib_atomic_wr wr;
45 	u16 mask, capcode;
46 	struct hfi1_opfn_type *extd;
47 	u64 data;
48 	unsigned long flags;
49 	int ret = 0;
50 
51 	trace_hfi1_opfn_state_conn_request(qp);
52 	spin_lock_irqsave(&priv->opfn.lock, flags);
53 	/*
54 	 * Exit if the extended bit is not set, or if nothing is requested, or
55 	 * if we have completed all requests, or if a previous request is in
56 	 * progress
57 	 */
58 	if (!priv->opfn.extended || !priv->opfn.requested ||
59 	    priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
60 		goto done;
61 
62 	mask = priv->opfn.requested & ~priv->opfn.completed;
63 	capcode = ilog2(mask & ~(mask - 1)) + 1;
64 	if (capcode >= STL_VERBS_EXTD_MAX) {
65 		priv->opfn.completed |= OPFN_CODE(capcode);
66 		goto done;
67 	}
68 
69 	extd = &hfi1_opfn_handlers[capcode];
70 	if (!extd || !extd->request || !extd->request(qp, &data)) {
71 		/*
72 		 * Either there is no handler for this capability or the request
73 		 * packet could not be generated. Either way, mark it as done so
74 		 * we don't keep attempting to complete it.
75 		 */
76 		priv->opfn.completed |= OPFN_CODE(capcode);
77 		goto done;
78 	}
79 
80 	trace_hfi1_opfn_data_conn_request(qp, capcode, data);
81 	data = (data & ~0xf) | capcode;
82 
83 	memset(&wr, 0, sizeof(wr));
84 	wr.wr.opcode = IB_WR_OPFN;
85 	wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
86 	wr.compare_add = data;
87 
88 	priv->opfn.curr = capcode;	/* A new request is now in progress */
89 	/* Drop opfn.lock before calling ib_post_send() */
90 	spin_unlock_irqrestore(&priv->opfn.lock, flags);
91 
92 	ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
93 	if (ret)
94 		goto err;
95 	trace_hfi1_opfn_state_conn_request(qp);
96 	return;
97 err:
98 	trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
99 					 (u64)ret);
100 	spin_lock_irqsave(&priv->opfn.lock, flags);
101 	/*
102 	 * In case of an unexpected error return from ib_post_send
103 	 * clear opfn.curr and reschedule to try again
104 	 */
105 	priv->opfn.curr = STL_VERBS_EXTD_NONE;
106 	opfn_schedule_conn_request(qp);
107 done:
108 	spin_unlock_irqrestore(&priv->opfn.lock, flags);
109 }
110 
111 void opfn_send_conn_request(struct work_struct *work)
112 {
113 	struct hfi1_opfn_data *od;
114 	struct hfi1_qp_priv *qpriv;
115 
116 	od = container_of(work, struct hfi1_opfn_data, opfn_work);
117 	qpriv = container_of(od, struct hfi1_qp_priv, opfn);
118 
119 	opfn_conn_request(qpriv->owner);
120 }
121 
122 /*
123  * When QP s_lock is held in the caller, the OPFN request must be scheduled
124  * to a different workqueue to avoid double locking QP s_lock in call to
125  * ib_post_send in opfn_conn_request
126  */
127 static void opfn_schedule_conn_request(struct rvt_qp *qp)
128 {
129 	struct hfi1_qp_priv *priv = qp->priv;
130 
131 	trace_hfi1_opfn_state_sched_conn_request(qp);
132 	queue_work(opfn_wq, &priv->opfn.opfn_work);
133 }
134 
135 void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
136 			struct ib_atomic_eth *ateth)
137 {
138 	struct hfi1_qp_priv *priv = qp->priv;
139 	u64 data = be64_to_cpu(ateth->compare_data);
140 	struct hfi1_opfn_type *extd;
141 	u8 capcode;
142 	unsigned long flags;
143 
144 	trace_hfi1_opfn_state_conn_response(qp);
145 	capcode = data & 0xf;
146 	trace_hfi1_opfn_data_conn_response(qp, capcode, data);
147 	if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
148 		return;
149 
150 	extd = &hfi1_opfn_handlers[capcode];
151 
152 	if (!extd || !extd->response) {
153 		e->atomic_data = capcode;
154 		return;
155 	}
156 
157 	spin_lock_irqsave(&priv->opfn.lock, flags);
158 	if (priv->opfn.completed & OPFN_CODE(capcode)) {
159 		/*
160 		 * We are receiving a request for a feature that has already
161 		 * been negotiated. This may mean that the other side has reset
162 		 */
163 		priv->opfn.completed &= ~OPFN_CODE(capcode);
164 		if (extd->error)
165 			extd->error(qp);
166 	}
167 
168 	if (extd->response(qp, &data))
169 		priv->opfn.completed |= OPFN_CODE(capcode);
170 	e->atomic_data = (data & ~0xf) | capcode;
171 	trace_hfi1_opfn_state_conn_response(qp);
172 	spin_unlock_irqrestore(&priv->opfn.lock, flags);
173 }
174 
175 void opfn_conn_reply(struct rvt_qp *qp, u64 data)
176 {
177 	struct hfi1_qp_priv *priv = qp->priv;
178 	struct hfi1_opfn_type *extd;
179 	u8 capcode;
180 	unsigned long flags;
181 
182 	trace_hfi1_opfn_state_conn_reply(qp);
183 	capcode = data & 0xf;
184 	trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
185 	if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
186 		return;
187 
188 	spin_lock_irqsave(&priv->opfn.lock, flags);
189 	/*
190 	 * Either there is no previous request or the reply is not for the
191 	 * current request
192 	 */
193 	if (!priv->opfn.curr || capcode != priv->opfn.curr)
194 		goto done;
195 
196 	extd = &hfi1_opfn_handlers[capcode];
197 
198 	if (!extd || !extd->reply)
199 		goto clear;
200 
201 	if (extd->reply(qp, data))
202 		priv->opfn.completed |= OPFN_CODE(capcode);
203 clear:
204 	/*
205 	 * Clear opfn.curr to indicate that the previous request is no longer in
206 	 * progress
207 	 */
208 	priv->opfn.curr = STL_VERBS_EXTD_NONE;
209 	trace_hfi1_opfn_state_conn_reply(qp);
210 done:
211 	spin_unlock_irqrestore(&priv->opfn.lock, flags);
212 }
213 
214 void opfn_conn_error(struct rvt_qp *qp)
215 {
216 	struct hfi1_qp_priv *priv = qp->priv;
217 	struct hfi1_opfn_type *extd = NULL;
218 	unsigned long flags;
219 	u16 capcode;
220 
221 	trace_hfi1_opfn_state_conn_error(qp);
222 	trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
223 	/*
224 	 * The QP has gone into the Error state. We have to invalidate all
225 	 * negotiated feature, including the one in progress (if any). The RC
226 	 * QP handling will clean the WQE for the connection request.
227 	 */
228 	spin_lock_irqsave(&priv->opfn.lock, flags);
229 	while (priv->opfn.completed) {
230 		capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
231 		extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
232 		if (extd->error)
233 			extd->error(qp);
234 		priv->opfn.completed &= ~OPFN_CODE(capcode);
235 	}
236 	priv->opfn.extended = 0;
237 	priv->opfn.requested = 0;
238 	priv->opfn.curr = STL_VERBS_EXTD_NONE;
239 	spin_unlock_irqrestore(&priv->opfn.lock, flags);
240 }
241 
242 void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
243 {
244 	struct ib_qp *ibqp = &qp->ibqp;
245 	struct hfi1_qp_priv *priv = qp->priv;
246 	unsigned long flags;
247 
248 	if (attr_mask & IB_QP_RETRY_CNT)
249 		priv->s_retry = attr->retry_cnt;
250 
251 	spin_lock_irqsave(&priv->opfn.lock, flags);
252 	if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
253 		struct tid_rdma_params *local = &priv->tid_rdma.local;
254 
255 		if (attr_mask & IB_QP_TIMEOUT)
256 			priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
257 		if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
258 		    qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
259 			tid_rdma_opfn_init(qp, local);
260 			/*
261 			 * We only want to set the OPFN requested bit when the
262 			 * QP transitions to RTS.
263 			 */
264 			if (attr_mask & IB_QP_STATE &&
265 			    attr->qp_state == IB_QPS_RTS) {
266 				priv->opfn.requested |= OPFN_MASK(TID_RDMA);
267 				/*
268 				 * If the QP is transitioning to RTS and the
269 				 * opfn.completed for TID RDMA has already been
270 				 * set, the QP is being moved *back* into RTS.
271 				 * We can now renegotiate the TID RDMA
272 				 * parameters.
273 				 */
274 				if (priv->opfn.completed &
275 				    OPFN_MASK(TID_RDMA)) {
276 					priv->opfn.completed &=
277 						~OPFN_MASK(TID_RDMA);
278 					/*
279 					 * Since the opfn.completed bit was
280 					 * already set, it is safe to assume
281 					 * that the opfn.extended is also set.
282 					 */
283 					opfn_schedule_conn_request(qp);
284 				}
285 			}
286 		} else {
287 			memset(local, 0, sizeof(*local));
288 		}
289 	}
290 	spin_unlock_irqrestore(&priv->opfn.lock, flags);
291 }
292 
293 void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
294 {
295 	struct hfi1_qp_priv *priv = qp->priv;
296 
297 	if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
298 	    HFI1_CAP_IS_KSET(OPFN)) {
299 		priv->opfn.extended = 1;
300 		if (qp->state == IB_QPS_RTS)
301 			opfn_conn_request(qp);
302 	}
303 }
304 
305 int opfn_init(void)
306 {
307 	opfn_wq = alloc_workqueue("hfi_opfn",
308 				  WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
309 				  WQ_MEM_RECLAIM,
310 				  HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
311 	if (!opfn_wq)
312 		return -ENOMEM;
313 
314 	return 0;
315 }
316 
317 void opfn_exit(void)
318 {
319 	if (opfn_wq) {
320 		destroy_workqueue(opfn_wq);
321 		opfn_wq = NULL;
322 	}
323 }
324