xref: /linux/net/smc/smc_llc.c (revision 40e79150c1686263e6a031d7702aec63aff31332)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Link Layer Control (LLC)
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Klaus Wacker <Klaus.Wacker@de.ibm.com>
10  *              Ursula Braun <ubraun@linux.vnet.ibm.com>
11  */
12 
13 #include <net/tcp.h>
14 #include <rdma/ib_verbs.h>
15 
16 #include "smc.h"
17 #include "smc_core.h"
18 #include "smc_clc.h"
19 #include "smc_llc.h"
20 
21 #define SMC_LLC_DATA_LEN		40
22 
23 struct smc_llc_hdr {
24 	struct smc_wr_rx_hdr common;
25 	u8 length;	/* 44 */
26 #if defined(__BIG_ENDIAN_BITFIELD)
27 	u8 reserved:4,
28 	   add_link_rej_rsn:4;
29 #elif defined(__LITTLE_ENDIAN_BITFIELD)
30 	u8 add_link_rej_rsn:4,
31 	   reserved:4;
32 #endif
33 	u8 flags;
34 };
35 
36 #define SMC_LLC_FLAG_NO_RMBE_EYEC	0x03
37 
38 struct smc_llc_msg_confirm_link {	/* type 0x01 */
39 	struct smc_llc_hdr hd;
40 	u8 sender_mac[ETH_ALEN];
41 	u8 sender_gid[SMC_GID_SIZE];
42 	u8 sender_qp_num[3];
43 	u8 link_num;
44 	u8 link_uid[SMC_LGR_ID_SIZE];
45 	u8 max_links;
46 	u8 reserved[9];
47 };
48 
49 #define SMC_LLC_FLAG_ADD_LNK_REJ	0x40
50 #define SMC_LLC_REJ_RSN_NO_ALT_PATH	1
51 
52 #define SMC_LLC_ADD_LNK_MAX_LINKS	2
53 
54 struct smc_llc_msg_add_link {		/* type 0x02 */
55 	struct smc_llc_hdr hd;
56 	u8 sender_mac[ETH_ALEN];
57 	u8 reserved2[2];
58 	u8 sender_gid[SMC_GID_SIZE];
59 	u8 sender_qp_num[3];
60 	u8 link_num;
61 	u8 flags2;	/* QP mtu */
62 	u8 initial_psn[3];
63 	u8 reserved[8];
64 };
65 
66 #define SMC_LLC_FLAG_DEL_LINK_ALL	0x40
67 #define SMC_LLC_FLAG_DEL_LINK_ORDERLY	0x20
68 
69 struct smc_llc_msg_del_link {		/* type 0x04 */
70 	struct smc_llc_hdr hd;
71 	u8 link_num;
72 	__be32 reason;
73 	u8 reserved[35];
74 } __packed;			/* format defined in RFC7609 */
75 
76 struct smc_llc_msg_test_link {		/* type 0x07 */
77 	struct smc_llc_hdr hd;
78 	u8 user_data[16];
79 	u8 reserved[24];
80 };
81 
82 struct smc_rmb_rtoken {
83 	union {
84 		u8 num_rkeys;	/* first rtoken byte of CONFIRM LINK msg */
85 				/* is actually the num of rtokens, first */
86 				/* rtoken is always for the current link */
87 		u8 link_id;	/* link id of the rtoken */
88 	};
89 	__be32 rmb_key;
90 	__be64 rmb_vaddr;
91 } __packed;			/* format defined in RFC7609 */
92 
93 #define SMC_LLC_RKEYS_PER_MSG	3
94 
95 struct smc_llc_msg_confirm_rkey {	/* type 0x06 */
96 	struct smc_llc_hdr hd;
97 	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
98 	u8 reserved;
99 };
100 
101 #define SMC_LLC_DEL_RKEY_MAX	8
102 #define SMC_LLC_FLAG_RKEY_RETRY	0x10
103 #define SMC_LLC_FLAG_RKEY_NEG	0x20
104 
105 struct smc_llc_msg_delete_rkey {	/* type 0x09 */
106 	struct smc_llc_hdr hd;
107 	u8 num_rkeys;
108 	u8 err_mask;
109 	u8 reserved[2];
110 	__be32 rkey[8];
111 	u8 reserved2[4];
112 };
113 
114 union smc_llc_msg {
115 	struct smc_llc_msg_confirm_link confirm_link;
116 	struct smc_llc_msg_add_link add_link;
117 	struct smc_llc_msg_del_link delete_link;
118 
119 	struct smc_llc_msg_confirm_rkey confirm_rkey;
120 	struct smc_llc_msg_delete_rkey delete_rkey;
121 
122 	struct smc_llc_msg_test_link test_link;
123 	struct {
124 		struct smc_llc_hdr hdr;
125 		u8 data[SMC_LLC_DATA_LEN];
126 	} raw;
127 };
128 
129 #define SMC_LLC_FLAG_RESP		0x80
130 
131 struct smc_llc_qentry {
132 	struct list_head list;
133 	struct smc_link *link;
134 	union smc_llc_msg msg;
135 };
136 
137 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
138 {
139 	struct smc_llc_qentry *qentry = flow->qentry;
140 
141 	flow->qentry = NULL;
142 	return qentry;
143 }
144 
145 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
146 {
147 	struct smc_llc_qentry *qentry;
148 
149 	if (flow->qentry) {
150 		qentry = flow->qentry;
151 		flow->qentry = NULL;
152 		kfree(qentry);
153 	}
154 }
155 
156 static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
157 					   struct smc_llc_qentry *qentry)
158 {
159 	flow->qentry = qentry;
160 }
161 
162 /* try to start a new llc flow, initiated by an incoming llc msg */
163 static bool smc_llc_flow_start(struct smc_llc_flow *flow,
164 			       struct smc_llc_qentry *qentry)
165 {
166 	struct smc_link_group *lgr = qentry->link->lgr;
167 
168 	spin_lock_bh(&lgr->llc_flow_lock);
169 	if (flow->type) {
170 		/* a flow is already active */
171 		if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
172 		     qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
173 		    !lgr->delayed_event) {
174 			lgr->delayed_event = qentry;
175 		} else {
176 			/* forget this llc request */
177 			kfree(qentry);
178 		}
179 		spin_unlock_bh(&lgr->llc_flow_lock);
180 		return false;
181 	}
182 	switch (qentry->msg.raw.hdr.common.type) {
183 	case SMC_LLC_ADD_LINK:
184 		flow->type = SMC_LLC_FLOW_ADD_LINK;
185 		break;
186 	case SMC_LLC_DELETE_LINK:
187 		flow->type = SMC_LLC_FLOW_DEL_LINK;
188 		break;
189 	case SMC_LLC_CONFIRM_RKEY:
190 	case SMC_LLC_DELETE_RKEY:
191 		flow->type = SMC_LLC_FLOW_RKEY;
192 		break;
193 	default:
194 		flow->type = SMC_LLC_FLOW_NONE;
195 	}
196 	if (qentry == lgr->delayed_event)
197 		lgr->delayed_event = NULL;
198 	spin_unlock_bh(&lgr->llc_flow_lock);
199 	smc_llc_flow_qentry_set(flow, qentry);
200 	return true;
201 }
202 
203 /* start a new local llc flow, wait till current flow finished */
204 int smc_llc_flow_initiate(struct smc_link_group *lgr,
205 			  enum smc_llc_flowtype type)
206 {
207 	enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
208 	int rc;
209 
210 	/* all flows except confirm_rkey and delete_rkey are exclusive,
211 	 * confirm/delete rkey flows can run concurrently (local and remote)
212 	 */
213 	if (type == SMC_LLC_FLOW_RKEY)
214 		allowed_remote = SMC_LLC_FLOW_RKEY;
215 again:
216 	if (list_empty(&lgr->list))
217 		return -ENODEV;
218 	spin_lock_bh(&lgr->llc_flow_lock);
219 	if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
220 	    (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
221 	     lgr->llc_flow_rmt.type == allowed_remote)) {
222 		lgr->llc_flow_lcl.type = type;
223 		spin_unlock_bh(&lgr->llc_flow_lock);
224 		return 0;
225 	}
226 	spin_unlock_bh(&lgr->llc_flow_lock);
227 	rc = wait_event_interruptible_timeout(lgr->llc_waiter,
228 			(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
229 			 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
230 			  lgr->llc_flow_rmt.type == allowed_remote)),
231 			SMC_LLC_WAIT_TIME);
232 	if (!rc)
233 		return -ETIMEDOUT;
234 	goto again;
235 }
236 
237 /* finish the current llc flow */
238 void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
239 {
240 	spin_lock_bh(&lgr->llc_flow_lock);
241 	memset(flow, 0, sizeof(*flow));
242 	flow->type = SMC_LLC_FLOW_NONE;
243 	spin_unlock_bh(&lgr->llc_flow_lock);
244 	if (!list_empty(&lgr->list) && lgr->delayed_event &&
245 	    flow == &lgr->llc_flow_lcl)
246 		schedule_work(&lgr->llc_event_work);
247 	else
248 		wake_up_interruptible(&lgr->llc_waiter);
249 }
250 
251 /* lnk is optional and used for early wakeup when link goes down, useful in
252  * cases where we wait for a response on the link after we sent a request
253  */
254 struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
255 				    struct smc_link *lnk,
256 				    int time_out, u8 exp_msg)
257 {
258 	struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
259 
260 	wait_event_interruptible_timeout(lgr->llc_waiter,
261 					 (flow->qentry ||
262 					  (lnk && !smc_link_usable(lnk)) ||
263 					  list_empty(&lgr->list)),
264 					 time_out);
265 	if (!flow->qentry ||
266 	    (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
267 		smc_llc_flow_qentry_del(flow);
268 		goto out;
269 	}
270 	if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
271 		if (exp_msg == SMC_LLC_ADD_LINK &&
272 		    flow->qentry->msg.raw.hdr.common.type ==
273 		    SMC_LLC_DELETE_LINK) {
274 			/* flow_start will delay the unexpected msg */
275 			smc_llc_flow_start(&lgr->llc_flow_lcl,
276 					   smc_llc_flow_qentry_clr(flow));
277 			return NULL;
278 		}
279 		smc_llc_flow_qentry_del(flow);
280 	}
281 out:
282 	return flow->qentry;
283 }
284 
285 /********************************** send *************************************/
286 
287 struct smc_llc_tx_pend {
288 };
289 
290 /* handler for send/transmission completion of an LLC msg */
291 static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
292 			       struct smc_link *link,
293 			       enum ib_wc_status wc_status)
294 {
295 	/* future work: handle wc_status error for recovery and failover */
296 }
297 
298 /**
299  * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
300  * @link: Pointer to SMC link used for sending LLC control message.
301  * @wr_buf: Out variable returning pointer to work request payload buffer.
302  * @pend: Out variable returning pointer to private pending WR tracking.
303  *	  It's the context the transmit complete handler will get.
304  *
305  * Reserves and pre-fills an entry for a pending work request send/tx.
306  * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
307  * Can sleep due to smc_get_ctrl_buf (if not in softirq context).
308  *
309  * Return: 0 on success, otherwise an error value.
310  */
311 static int smc_llc_add_pending_send(struct smc_link *link,
312 				    struct smc_wr_buf **wr_buf,
313 				    struct smc_wr_tx_pend_priv **pend)
314 {
315 	int rc;
316 
317 	rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
318 				     pend);
319 	if (rc < 0)
320 		return rc;
321 	BUILD_BUG_ON_MSG(
322 		sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
323 		"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
324 	BUILD_BUG_ON_MSG(
325 		sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
326 		"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
327 	BUILD_BUG_ON_MSG(
328 		sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
329 		"must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
330 	return 0;
331 }
332 
333 /* high-level API to send LLC confirm link */
334 int smc_llc_send_confirm_link(struct smc_link *link,
335 			      enum smc_llc_reqresp reqresp)
336 {
337 	struct smc_link_group *lgr = smc_get_lgr(link);
338 	struct smc_llc_msg_confirm_link *confllc;
339 	struct smc_wr_tx_pend_priv *pend;
340 	struct smc_wr_buf *wr_buf;
341 	int rc;
342 
343 	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
344 	if (rc)
345 		return rc;
346 	confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
347 	memset(confllc, 0, sizeof(*confllc));
348 	confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
349 	confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
350 	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
351 	if (reqresp == SMC_LLC_RESP)
352 		confllc->hd.flags |= SMC_LLC_FLAG_RESP;
353 	memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
354 	       ETH_ALEN);
355 	memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
356 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
357 	confllc->link_num = link->link_id;
358 	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
359 	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
360 	/* send llc message */
361 	rc = smc_wr_tx_send(link, pend);
362 	return rc;
363 }
364 
365 /* send LLC confirm rkey request */
366 static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
367 				     struct smc_buf_desc *rmb_desc)
368 {
369 	struct smc_llc_msg_confirm_rkey *rkeyllc;
370 	struct smc_wr_tx_pend_priv *pend;
371 	struct smc_wr_buf *wr_buf;
372 	struct smc_link *link;
373 	int i, rc, rtok_ix;
374 
375 	rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
376 	if (rc)
377 		return rc;
378 	rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
379 	memset(rkeyllc, 0, sizeof(*rkeyllc));
380 	rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
381 	rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
382 
383 	rtok_ix = 1;
384 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
385 		link = &send_link->lgr->lnk[i];
386 		if (link->state == SMC_LNK_ACTIVE && link != send_link) {
387 			rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
388 			rkeyllc->rtoken[rtok_ix].rmb_key =
389 				htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
390 			rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
391 				(u64)sg_dma_address(
392 					rmb_desc->sgt[link->link_idx].sgl));
393 			rtok_ix++;
394 		}
395 	}
396 	/* rkey of send_link is in rtoken[0] */
397 	rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
398 	rkeyllc->rtoken[0].rmb_key =
399 		htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
400 	rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
401 		(u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
402 	/* send llc message */
403 	rc = smc_wr_tx_send(send_link, pend);
404 	return rc;
405 }
406 
407 /* send LLC delete rkey request */
408 static int smc_llc_send_delete_rkey(struct smc_link *link,
409 				    struct smc_buf_desc *rmb_desc)
410 {
411 	struct smc_llc_msg_delete_rkey *rkeyllc;
412 	struct smc_wr_tx_pend_priv *pend;
413 	struct smc_wr_buf *wr_buf;
414 	int rc;
415 
416 	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
417 	if (rc)
418 		return rc;
419 	rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
420 	memset(rkeyllc, 0, sizeof(*rkeyllc));
421 	rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
422 	rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
423 	rkeyllc->num_rkeys = 1;
424 	rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
425 	/* send llc message */
426 	rc = smc_wr_tx_send(link, pend);
427 	return rc;
428 }
429 
430 /* prepare an add link message */
431 static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
432 				  struct smc_link *link, u8 mac[], u8 gid[],
433 				  enum smc_llc_reqresp reqresp)
434 {
435 	memset(addllc, 0, sizeof(*addllc));
436 	addllc->hd.common.type = SMC_LLC_ADD_LINK;
437 	addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
438 	if (reqresp == SMC_LLC_RESP) {
439 		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
440 		/* always reject more links for now */
441 		addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
442 		addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
443 	}
444 	memcpy(addllc->sender_mac, mac, ETH_ALEN);
445 	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
446 }
447 
448 /* send ADD LINK request or response */
449 int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
450 			  enum smc_llc_reqresp reqresp)
451 {
452 	struct smc_llc_msg_add_link *addllc;
453 	struct smc_wr_tx_pend_priv *pend;
454 	struct smc_wr_buf *wr_buf;
455 	int rc;
456 
457 	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
458 	if (rc)
459 		return rc;
460 	addllc = (struct smc_llc_msg_add_link *)wr_buf;
461 	smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
462 	/* send llc message */
463 	rc = smc_wr_tx_send(link, pend);
464 	return rc;
465 }
466 
467 /* prepare a delete link message */
468 static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
469 				     struct smc_link *link,
470 				     enum smc_llc_reqresp reqresp, bool orderly)
471 {
472 	memset(delllc, 0, sizeof(*delllc));
473 	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
474 	delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
475 	if (reqresp == SMC_LLC_RESP)
476 		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
477 	/* DEL_LINK_ALL because only 1 link supported */
478 	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
479 	if (orderly)
480 		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
481 	delllc->link_num = link->link_id;
482 }
483 
484 /* send DELETE LINK request or response */
485 int smc_llc_send_delete_link(struct smc_link *link,
486 			     enum smc_llc_reqresp reqresp, bool orderly)
487 {
488 	struct smc_llc_msg_del_link *delllc;
489 	struct smc_wr_tx_pend_priv *pend;
490 	struct smc_wr_buf *wr_buf;
491 	int rc;
492 
493 	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
494 	if (rc)
495 		return rc;
496 	delllc = (struct smc_llc_msg_del_link *)wr_buf;
497 	smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
498 	/* send llc message */
499 	rc = smc_wr_tx_send(link, pend);
500 	return rc;
501 }
502 
503 /* send LLC test link request */
504 static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
505 {
506 	struct smc_llc_msg_test_link *testllc;
507 	struct smc_wr_tx_pend_priv *pend;
508 	struct smc_wr_buf *wr_buf;
509 	int rc;
510 
511 	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
512 	if (rc)
513 		return rc;
514 	testllc = (struct smc_llc_msg_test_link *)wr_buf;
515 	memset(testllc, 0, sizeof(*testllc));
516 	testllc->hd.common.type = SMC_LLC_TEST_LINK;
517 	testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
518 	memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
519 	/* send llc message */
520 	rc = smc_wr_tx_send(link, pend);
521 	return rc;
522 }
523 
524 /* schedule an llc send on link, may wait for buffers */
525 static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
526 {
527 	struct smc_wr_tx_pend_priv *pend;
528 	struct smc_wr_buf *wr_buf;
529 	int rc;
530 
531 	if (!smc_link_usable(link))
532 		return -ENOLINK;
533 	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
534 	if (rc)
535 		return rc;
536 	memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
537 	return smc_wr_tx_send(link, pend);
538 }
539 
540 /********************************* receive ***********************************/
541 
542 static void smc_llc_rx_delete_link(struct smc_link *link,
543 				   struct smc_llc_msg_del_link *llc)
544 {
545 	struct smc_link_group *lgr = smc_get_lgr(link);
546 
547 	smc_lgr_forget(lgr);
548 	if (lgr->role == SMC_SERV) {
549 		/* client asks to delete this link, send request */
550 		smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
551 	} else {
552 		/* server requests to delete this link, send response */
553 		smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
554 	}
555 	smc_llc_send_message(link, llc);
556 	smc_lgr_terminate_sched(lgr);
557 }
558 
559 /* process a confirm_rkey request from peer, remote flow */
560 static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
561 {
562 	struct smc_llc_msg_confirm_rkey *llc;
563 	struct smc_llc_qentry *qentry;
564 	struct smc_link *link;
565 	int num_entries;
566 	int rk_idx;
567 	int i;
568 
569 	qentry = lgr->llc_flow_rmt.qentry;
570 	llc = &qentry->msg.confirm_rkey;
571 	link = qentry->link;
572 
573 	num_entries = llc->rtoken[0].num_rkeys;
574 	/* first rkey entry is for receiving link */
575 	rk_idx = smc_rtoken_add(link,
576 				llc->rtoken[0].rmb_vaddr,
577 				llc->rtoken[0].rmb_key);
578 	if (rk_idx < 0)
579 		goto out_err;
580 
581 	for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
582 		smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
583 				llc->rtoken[i].rmb_vaddr,
584 				llc->rtoken[i].rmb_key);
585 	/* max links is 3 so there is no need to support conf_rkey_cont msgs */
586 	goto out;
587 out_err:
588 	llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
589 	llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
590 out:
591 	llc->hd.flags |= SMC_LLC_FLAG_RESP;
592 	smc_llc_send_message(link, &qentry->msg);
593 	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
594 }
595 
596 /* process a delete_rkey request from peer, remote flow */
597 static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
598 {
599 	struct smc_llc_msg_delete_rkey *llc;
600 	struct smc_llc_qentry *qentry;
601 	struct smc_link *link;
602 	u8 err_mask = 0;
603 	int i, max;
604 
605 	qentry = lgr->llc_flow_rmt.qentry;
606 	llc = &qentry->msg.delete_rkey;
607 	link = qentry->link;
608 
609 	max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
610 	for (i = 0; i < max; i++) {
611 		if (smc_rtoken_delete(link, llc->rkey[i]))
612 			err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
613 	}
614 	if (err_mask) {
615 		llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
616 		llc->err_mask = err_mask;
617 	}
618 	llc->hd.flags |= SMC_LLC_FLAG_RESP;
619 	smc_llc_send_message(link, &qentry->msg);
620 	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
621 }
622 
623 /* flush the llc event queue */
624 static void smc_llc_event_flush(struct smc_link_group *lgr)
625 {
626 	struct smc_llc_qentry *qentry, *q;
627 
628 	spin_lock_bh(&lgr->llc_event_q_lock);
629 	list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
630 		list_del_init(&qentry->list);
631 		kfree(qentry);
632 	}
633 	spin_unlock_bh(&lgr->llc_event_q_lock);
634 }
635 
636 static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
637 {
638 	union smc_llc_msg *llc = &qentry->msg;
639 	struct smc_link *link = qentry->link;
640 	struct smc_link_group *lgr = link->lgr;
641 
642 	if (!smc_link_usable(link))
643 		goto out;
644 
645 	switch (llc->raw.hdr.common.type) {
646 	case SMC_LLC_TEST_LINK:
647 		llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
648 		smc_llc_send_message(link, llc);
649 		break;
650 	case SMC_LLC_ADD_LINK:
651 		if (list_empty(&lgr->list))
652 			goto out;	/* lgr is terminating */
653 		if (lgr->role == SMC_CLNT) {
654 			if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
655 				/* a flow is waiting for this message */
656 				smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
657 							qentry);
658 				wake_up_interruptible(&lgr->llc_waiter);
659 			} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
660 						      qentry)) {
661 				/* tbd: schedule_work(&lgr->llc_add_link_work); */
662 			}
663 		} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
664 			/* as smc server, handle client suggestion */
665 			/* tbd: schedule_work(&lgr->llc_add_link_work); */
666 		}
667 		return;
668 	case SMC_LLC_CONFIRM_LINK:
669 		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
670 			/* a flow is waiting for this message */
671 			smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
672 			wake_up_interruptible(&lgr->llc_waiter);
673 			return;
674 		}
675 		break;
676 	case SMC_LLC_DELETE_LINK:
677 		smc_llc_rx_delete_link(link, &llc->delete_link);
678 		break;
679 	case SMC_LLC_CONFIRM_RKEY:
680 		/* new request from remote, assign to remote flow */
681 		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
682 			/* process here, does not wait for more llc msgs */
683 			smc_llc_rmt_conf_rkey(lgr);
684 			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
685 		}
686 		return;
687 	case SMC_LLC_CONFIRM_RKEY_CONT:
688 		/* not used because max links is 3, and 3 rkeys fit into
689 		 * one CONFIRM_RKEY message
690 		 */
691 		break;
692 	case SMC_LLC_DELETE_RKEY:
693 		/* new request from remote, assign to remote flow */
694 		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
695 			/* process here, does not wait for more llc msgs */
696 			smc_llc_rmt_delete_rkey(lgr);
697 			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
698 		}
699 		return;
700 	}
701 out:
702 	kfree(qentry);
703 }
704 
705 /* worker to process llc messages on the event queue */
706 static void smc_llc_event_work(struct work_struct *work)
707 {
708 	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
709 						  llc_event_work);
710 	struct smc_llc_qentry *qentry;
711 
712 	if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
713 		if (smc_link_usable(lgr->delayed_event->link)) {
714 			smc_llc_event_handler(lgr->delayed_event);
715 		} else {
716 			qentry = lgr->delayed_event;
717 			lgr->delayed_event = NULL;
718 			kfree(qentry);
719 		}
720 	}
721 
722 again:
723 	spin_lock_bh(&lgr->llc_event_q_lock);
724 	if (!list_empty(&lgr->llc_event_q)) {
725 		qentry = list_first_entry(&lgr->llc_event_q,
726 					  struct smc_llc_qentry, list);
727 		list_del_init(&qentry->list);
728 		spin_unlock_bh(&lgr->llc_event_q_lock);
729 		smc_llc_event_handler(qentry);
730 		goto again;
731 	}
732 	spin_unlock_bh(&lgr->llc_event_q_lock);
733 }
734 
735 /* process llc responses in tasklet context */
736 static void smc_llc_rx_response(struct smc_link *link,
737 				struct smc_llc_qentry *qentry)
738 {
739 	u8 llc_type = qentry->msg.raw.hdr.common.type;
740 
741 	switch (llc_type) {
742 	case SMC_LLC_TEST_LINK:
743 		if (link->state == SMC_LNK_ACTIVE)
744 			complete(&link->llc_testlink_resp);
745 		break;
746 	case SMC_LLC_ADD_LINK:
747 	case SMC_LLC_CONFIRM_LINK:
748 	case SMC_LLC_CONFIRM_RKEY:
749 	case SMC_LLC_DELETE_RKEY:
750 		/* assign responses to the local flow, we requested them */
751 		smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
752 		wake_up_interruptible(&link->lgr->llc_waiter);
753 		return;
754 	case SMC_LLC_DELETE_LINK:
755 		if (link->lgr->role == SMC_SERV)
756 			smc_lgr_schedule_free_work_fast(link->lgr);
757 		break;
758 	case SMC_LLC_CONFIRM_RKEY_CONT:
759 		/* not used because max links is 3 */
760 		break;
761 	}
762 	kfree(qentry);
763 }
764 
765 static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
766 {
767 	struct smc_link_group *lgr = link->lgr;
768 	struct smc_llc_qentry *qentry;
769 	unsigned long flags;
770 
771 	qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
772 	if (!qentry)
773 		return;
774 	qentry->link = link;
775 	INIT_LIST_HEAD(&qentry->list);
776 	memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
777 
778 	/* process responses immediately */
779 	if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
780 		smc_llc_rx_response(link, qentry);
781 		return;
782 	}
783 
784 	/* add requests to event queue */
785 	spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
786 	list_add_tail(&qentry->list, &lgr->llc_event_q);
787 	spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
788 	schedule_work(&link->lgr->llc_event_work);
789 }
790 
791 /* copy received msg and add it to the event queue */
792 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
793 {
794 	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
795 	union smc_llc_msg *llc = buf;
796 
797 	if (wc->byte_len < sizeof(*llc))
798 		return; /* short message */
799 	if (llc->raw.hdr.length != sizeof(*llc))
800 		return; /* invalid message */
801 
802 	smc_llc_enqueue(link, llc);
803 }
804 
805 /***************************** worker, utils *********************************/
806 
807 static void smc_llc_testlink_work(struct work_struct *work)
808 {
809 	struct smc_link *link = container_of(to_delayed_work(work),
810 					     struct smc_link, llc_testlink_wrk);
811 	unsigned long next_interval;
812 	unsigned long expire_time;
813 	u8 user_data[16] = { 0 };
814 	int rc;
815 
816 	if (link->state != SMC_LNK_ACTIVE)
817 		return;		/* don't reschedule worker */
818 	expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
819 	if (time_is_after_jiffies(expire_time)) {
820 		next_interval = expire_time - jiffies;
821 		goto out;
822 	}
823 	reinit_completion(&link->llc_testlink_resp);
824 	smc_llc_send_test_link(link, user_data);
825 	/* receive TEST LINK response over RoCE fabric */
826 	rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
827 						       SMC_LLC_WAIT_TIME);
828 	if (link->state != SMC_LNK_ACTIVE)
829 		return;		/* link state changed */
830 	if (rc <= 0) {
831 		smc_lgr_terminate_sched(smc_get_lgr(link));
832 		return;
833 	}
834 	next_interval = link->llc_testlink_time;
835 out:
836 	schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
837 }
838 
839 void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
840 {
841 	struct net *net = sock_net(smc->clcsock->sk);
842 
843 	INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
844 	INIT_LIST_HEAD(&lgr->llc_event_q);
845 	spin_lock_init(&lgr->llc_event_q_lock);
846 	spin_lock_init(&lgr->llc_flow_lock);
847 	init_waitqueue_head(&lgr->llc_waiter);
848 	lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
849 }
850 
851 /* called after lgr was removed from lgr_list */
852 void smc_llc_lgr_clear(struct smc_link_group *lgr)
853 {
854 	smc_llc_event_flush(lgr);
855 	wake_up_interruptible_all(&lgr->llc_waiter);
856 	cancel_work_sync(&lgr->llc_event_work);
857 	if (lgr->delayed_event) {
858 		kfree(lgr->delayed_event);
859 		lgr->delayed_event = NULL;
860 	}
861 }
862 
863 int smc_llc_link_init(struct smc_link *link)
864 {
865 	init_completion(&link->llc_testlink_resp);
866 	INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
867 	return 0;
868 }
869 
870 void smc_llc_link_active(struct smc_link *link)
871 {
872 	link->state = SMC_LNK_ACTIVE;
873 	if (link->lgr->llc_testlink_time) {
874 		link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
875 		schedule_delayed_work(&link->llc_testlink_wrk,
876 				      link->llc_testlink_time);
877 	}
878 }
879 
880 /* called in worker context */
881 void smc_llc_link_clear(struct smc_link *link)
882 {
883 	complete(&link->llc_testlink_resp);
884 	cancel_delayed_work_sync(&link->llc_testlink_wrk);
885 	smc_wr_wakeup_reg_wait(link);
886 	smc_wr_wakeup_tx_wait(link);
887 }
888 
889 /* register a new rtoken at the remote peer (for all links) */
890 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
891 			    struct smc_buf_desc *rmb_desc)
892 {
893 	struct smc_link_group *lgr = send_link->lgr;
894 	struct smc_llc_qentry *qentry = NULL;
895 	int rc = 0;
896 
897 	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
898 	if (rc)
899 		return rc;
900 	rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
901 	if (rc)
902 		goto out;
903 	/* receive CONFIRM RKEY response from server over RoCE fabric */
904 	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
905 			      SMC_LLC_CONFIRM_RKEY);
906 	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
907 		rc = -EFAULT;
908 out:
909 	if (qentry)
910 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
911 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
912 	return rc;
913 }
914 
915 /* unregister an rtoken at the remote peer */
916 int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
917 			   struct smc_buf_desc *rmb_desc)
918 {
919 	struct smc_llc_qentry *qentry = NULL;
920 	struct smc_link *send_link;
921 	int rc = 0;
922 
923 	send_link = smc_llc_usable_link(lgr);
924 	if (!send_link)
925 		return -ENOLINK;
926 
927 	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
928 	if (rc)
929 		return rc;
930 	/* protected by llc_flow control */
931 	rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
932 	if (rc)
933 		goto out;
934 	/* receive DELETE RKEY response from server over RoCE fabric */
935 	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
936 			      SMC_LLC_DELETE_RKEY);
937 	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
938 		rc = -EFAULT;
939 out:
940 	if (qentry)
941 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
942 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
943 	return rc;
944 }
945 
946 /* evaluate confirm link request or response */
947 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
948 			   enum smc_llc_reqresp type)
949 {
950 	if (type == SMC_LLC_REQ)	/* SMC server assigns link_id */
951 		qentry->link->link_id = qentry->msg.confirm_link.link_num;
952 	if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
953 		return -ENOTSUPP;
954 	return 0;
955 }
956 
957 /***************************** init, exit, misc ******************************/
958 
959 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
960 	{
961 		.handler	= smc_llc_rx_handler,
962 		.type		= SMC_LLC_CONFIRM_LINK
963 	},
964 	{
965 		.handler	= smc_llc_rx_handler,
966 		.type		= SMC_LLC_TEST_LINK
967 	},
968 	{
969 		.handler	= smc_llc_rx_handler,
970 		.type		= SMC_LLC_ADD_LINK
971 	},
972 	{
973 		.handler	= smc_llc_rx_handler,
974 		.type		= SMC_LLC_DELETE_LINK
975 	},
976 	{
977 		.handler	= smc_llc_rx_handler,
978 		.type		= SMC_LLC_CONFIRM_RKEY
979 	},
980 	{
981 		.handler	= smc_llc_rx_handler,
982 		.type		= SMC_LLC_CONFIRM_RKEY_CONT
983 	},
984 	{
985 		.handler	= smc_llc_rx_handler,
986 		.type		= SMC_LLC_DELETE_RKEY
987 	},
988 	{
989 		.handler	= NULL,
990 	}
991 };
992 
993 int __init smc_llc_init(void)
994 {
995 	struct smc_wr_rx_handler *handler;
996 	int rc = 0;
997 
998 	for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
999 		INIT_HLIST_NODE(&handler->list);
1000 		rc = smc_wr_rx_register_handler(handler);
1001 		if (rc)
1002 			break;
1003 	}
1004 	return rc;
1005 }
1006