xref: /linux/fs/smb/smbdirect/connect.c (revision 0fc8f6200d2313278fbf4539bbab74677c685531)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *   Copyright (c) 2012,2016,2017,2025 Stefan Metzmacher
4  */
5 
6 #include "internal.h"
7 #include "../common/smb2status.h"
8 
9 static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc);
10 static int smbdirect_connect_resolve_addr(struct smbdirect_socket *sc,
11 					  const struct sockaddr *src,
12 					  const struct sockaddr *dst);
13 static int smbdirect_connect_rdma_event_handler(struct rdma_cm_id *id,
14 						struct rdma_cm_event *event);
15 static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc);
16 static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc);
17 static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc);
18 
19 int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst)
20 {
21 	const struct sockaddr *src = NULL;
22 	union {
23 		struct sockaddr sa;
24 		struct sockaddr_storage ss;
25 	} src_addr = {
26 		.sa = {
27 			.sa_family = AF_UNSPEC,
28 		},
29 	};
30 	int ret;
31 
32 	if (sc->first_error)
33 		return -ENOTCONN;
34 
35 	if (sc->status != SMBDIRECT_SOCKET_CREATED)
36 		return -EALREADY;
37 
38 	if (WARN_ON_ONCE(!sc->rdma.cm_id))
39 		return -EINVAL;
40 
41 	src_addr.ss = sc->rdma.cm_id->route.addr.src_addr;
42 	if (src_addr.sa.sa_family != AF_UNSPEC)
43 		src = &src_addr.sa;
44 
45 	smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO,
46 		"connect: src: %pISpsfc dst: %pISpsfc\n",
47 		src, dst);
48 
49 	ret = smbdirect_connect_setup_connection(sc);
50 	if (ret)
51 		return ret;
52 
53 	ret = smbdirect_connect_resolve_addr(sc, src, dst);
54 	if (ret)
55 		return ret;
56 
57 	/*
58 	 * The rest happens async via smbdirect_connect_rdma_event_handler()
59 	 * the caller will decide to wait or not.
60 	 */
61 	return 0;
62 }
63 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect);
64 
65 static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc)
66 {
67 	rdma_lock_handler(sc->rdma.cm_id);
68 	sc->rdma.cm_id->event_handler = smbdirect_connect_rdma_event_handler;
69 	rdma_unlock_handler(sc->rdma.cm_id);
70 
71 	if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_CREATED))
72 		return -EINVAL;
73 	sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED;
74 
75 	return 0;
76 }
77 
78 static int smbdirect_connect_resolve_addr(struct smbdirect_socket *sc,
79 					  const struct sockaddr *src,
80 					  const struct sockaddr *dst)
81 {
82 	const struct smbdirect_socket_parameters *sp = &sc->parameters;
83 	struct sockaddr *src_addr = NULL;
84 	struct sockaddr *dst_addr = NULL;
85 	int ret;
86 
87 	src_addr = (struct sockaddr *)src;
88 	if (src_addr && src_addr->sa_family == AF_UNSPEC)
89 		src_addr = NULL;
90 	dst_addr = (struct sockaddr *)dst;
91 
92 	if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED))
93 		return -EINVAL;
94 	sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING;
95 	sc->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
96 	ret = rdma_resolve_addr(sc->rdma.cm_id, src_addr, dst_addr,
97 				sp->resolve_addr_timeout_msec);
98 	if (ret) {
99 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
100 			"rdma_resolve_addr() failed %1pe\n",
101 			SMBDIRECT_DEBUG_ERR_PTR(ret));
102 		return ret;
103 	}
104 
105 	return 0;
106 }
107 
108 static int smbdirect_connect_resolve_route(struct smbdirect_socket *sc)
109 {
110 	const struct smbdirect_socket_parameters *sp = &sc->parameters;
111 	int ret;
112 
113 	if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED))
114 		return sc->first_error;
115 	sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING;
116 	sc->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
117 	ret = rdma_resolve_route(sc->rdma.cm_id, sp->resolve_route_timeout_msec);
118 	if (ret) {
119 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
120 			"rdma_resolve_route() failed %1pe\n",
121 			SMBDIRECT_DEBUG_ERR_PTR(ret));
122 		return ret;
123 	}
124 
125 	return 0;
126 }
127 
128 static int smbdirect_connect_rdma_connect(struct smbdirect_socket *sc)
129 {
130 	struct smbdirect_socket_parameters *sp = &sc->parameters;
131 	struct rdma_conn_param conn_param;
132 	__be32 ird_ord_hdr[2];
133 	int ret;
134 
135 	sc->ib.dev = sc->rdma.cm_id->device;
136 
137 	if (!smbdirect_frwr_is_supported(&sc->ib.dev->attrs)) {
138 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
139 			"Fast Registration Work Requests (FRWR) is not supported device %.*s\n",
140 			IB_DEVICE_NAME_MAX,
141 			sc->ib.dev->name);
142 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
143 			"Device capability flags = %llx max_fast_reg_page_list_len = %u\n",
144 			sc->ib.dev->attrs.device_cap_flags,
145 			sc->ib.dev->attrs.max_fast_reg_page_list_len);
146 		return -EPROTONOSUPPORT;
147 	}
148 
149 	if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB &&
150 	    !rdma_ib_or_roce(sc->ib.dev, sc->rdma.cm_id->port_num)) {
151 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
152 			"Not IB: device: %.*s IW:%u local: %pISpsfc remote: %pISpsfc\n",
153 			IB_DEVICE_NAME_MAX,
154 			sc->ib.dev->name,
155 			rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num),
156 			&sc->rdma.cm_id->route.addr.src_addr,
157 			&sc->rdma.cm_id->route.addr.dst_addr);
158 		return -EPROTONOSUPPORT;
159 	}
160 	if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW &&
161 	    !rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num)) {
162 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
163 			"Not IW: device: %.*s IB:%u local: %pISpsfc remote: %pISpsfc\n",
164 			IB_DEVICE_NAME_MAX,
165 			sc->ib.dev->name,
166 			rdma_ib_or_roce(sc->ib.dev, sc->rdma.cm_id->port_num),
167 			&sc->rdma.cm_id->route.addr.src_addr,
168 			&sc->rdma.cm_id->route.addr.dst_addr);
169 		return -EPROTONOSUPPORT;
170 	}
171 
172 	smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO,
173 		"rdma connect: device: %.*s local: %pISpsfc remote: %pISpsfc\n",
174 		IB_DEVICE_NAME_MAX,
175 		sc->ib.dev->name,
176 		&sc->rdma.cm_id->route.addr.src_addr,
177 		&sc->rdma.cm_id->route.addr.dst_addr);
178 
179 	sp->max_frmr_depth = min_t(u32, sp->max_frmr_depth,
180 				   sc->ib.dev->attrs.max_fast_reg_page_list_len);
181 	sc->mr_io.type = IB_MR_TYPE_MEM_REG;
182 	if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
183 		sc->mr_io.type = IB_MR_TYPE_SG_GAPS;
184 
185 	sp->responder_resources = min_t(u8, sp->responder_resources,
186 					sc->ib.dev->attrs.max_qp_rd_atom);
187 	smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO,
188 		"responder_resources=%d\n",
189 		sp->responder_resources);
190 
191 	ret = smbdirect_connection_create_qp(sc);
192 	if (ret) {
193 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
194 			"smbdirect_connection_create_qp() failed %1pe\n",
195 			SMBDIRECT_DEBUG_ERR_PTR(ret));
196 		return ret;
197 	}
198 
199 	memset(&conn_param, 0, sizeof(conn_param));
200 	conn_param.initiator_depth = sp->initiator_depth;
201 	conn_param.responder_resources = sp->responder_resources;
202 
203 	/* Need to send IRD/ORD in private data for iWARP */
204 	if (rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num)) {
205 		ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources);
206 		ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth);
207 		conn_param.private_data = ird_ord_hdr;
208 		conn_param.private_data_len = sizeof(ird_ord_hdr);
209 	} else {
210 		conn_param.private_data = NULL;
211 		conn_param.private_data_len = 0;
212 	}
213 
214 	conn_param.retry_count = SMBDIRECT_RDMA_CM_RETRY;
215 	conn_param.rnr_retry_count = SMBDIRECT_RDMA_CM_RNR_RETRY;
216 	conn_param.flow_control = 0;
217 
218 	if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED))
219 		return sc->first_error;
220 	sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING;
221 	sc->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
222 	ret = rdma_connect_locked(sc->rdma.cm_id, &conn_param);
223 	if (ret) {
224 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
225 			"rdma_connect_locked() failed %1pe\n",
226 			SMBDIRECT_DEBUG_ERR_PTR(ret));
227 		return ret;
228 	}
229 
230 	/*
231 	 * start with the rdma connect timeout and SMBDIRECT_KEEPALIVE_PENDING
232 	 * so that the timer will cause a disconnect.
233 	 */
234 	INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work);
235 	sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING;
236 	mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work,
237 			 msecs_to_jiffies(sp->rdma_connect_timeout_msec));
238 
239 	return 0;
240 }
241 
242 static int smbdirect_connect_rdma_event_handler(struct rdma_cm_id *id,
243 						struct rdma_cm_event *event)
244 {
245 	struct smbdirect_socket *sc = id->context;
246 	u8 peer_initiator_depth;
247 	u8 peer_responder_resources;
248 	int ret;
249 
250 	/*
251 	 * cma_cm_event_handler() has
252 	 * lockdep_assert_held(&id_priv->handler_mutex);
253 	 *
254 	 * Mutexes are not allowed in interrupts,
255 	 * and we rely on not being in an interrupt here,
256 	 * as we might sleep.
257 	 *
258 	 * We didn't timeout so we cancel our idle timer,
259 	 * it will be scheduled again if needed.
260 	 */
261 	WARN_ON_ONCE(in_interrupt());
262 	sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE;
263 	cancel_delayed_work_sync(&sc->idle.timer_work);
264 
265 	if (event->status || event->event != sc->rdma.expected_event) {
266 		int lvl = SMBDIRECT_LOG_ERR;
267 
268 		ret = -ECONNABORTED;
269 
270 		if (event->event == RDMA_CM_EVENT_REJECTED)
271 			ret = -ECONNREFUSED;
272 		if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
273 			ret = -ENETDOWN;
274 		if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status)))
275 			ret = event->status;
276 
277 		if (ret == -ENODEV)
278 			lvl = SMBDIRECT_LOG_INFO;
279 
280 		smbdirect_log_rdma_event(sc, lvl,
281 			"%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n",
282 			smbdirect_socket_status_string(sc->status),
283 			SMBDIRECT_DEBUG_ERR_PTR(sc->first_error),
284 			rdma_event_msg(sc->rdma.expected_event),
285 			rdma_event_msg(event->event),
286 			event->status,
287 			SMBDIRECT_DEBUG_ERR_PTR(ret));
288 
289 		smbdirect_socket_schedule_cleanup_lvl(sc,
290 						      lvl,
291 						      ret);
292 		return 0;
293 	}
294 
295 	smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO,
296 		"%s (first_error=%1pe) event=%s\n",
297 		smbdirect_socket_status_string(sc->status),
298 		SMBDIRECT_DEBUG_ERR_PTR(sc->first_error),
299 		rdma_event_msg(event->event));
300 
301 	if (sc->first_error)
302 		return 0;
303 
304 	switch (event->event) {
305 	case RDMA_CM_EVENT_ADDR_RESOLVED:
306 		if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING))
307 			return 0;
308 		sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED;
309 
310 		ret = smbdirect_connect_resolve_route(sc);
311 		if (ret)
312 			smbdirect_socket_schedule_cleanup(sc, ret);
313 		return 0;
314 
315 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
316 		if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING))
317 			return 0;
318 		sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED;
319 
320 		ret = smbdirect_connect_rdma_connect(sc);
321 		if (ret)
322 			smbdirect_socket_schedule_cleanup(sc, ret);
323 		return 0;
324 
325 	case RDMA_CM_EVENT_ESTABLISHED:
326 		smbdirect_connection_rdma_established(sc);
327 
328 		if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING))
329 			return 0;
330 		sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED;
331 
332 		/*
333 		 * Here we work around an inconsistency between
334 		 * iWarp and other devices (at least rxe and irdma using RoCEv2)
335 		 */
336 		if (rdma_protocol_iwarp(id->device, id->port_num)) {
337 			/*
338 			 * iWarp devices report the peer's values
339 			 * with the perspective of the peer here.
340 			 * Tested with siw and irdma (in iwarp mode)
341 			 * We need to change to our perspective here,
342 			 * so we need to switch the values.
343 			 */
344 			peer_initiator_depth = event->param.conn.responder_resources;
345 			peer_responder_resources = event->param.conn.initiator_depth;
346 		} else {
347 			/*
348 			 * Non iWarp devices report the peer's values
349 			 * already changed to our perspective here.
350 			 * Tested with rxe and irdma (in roce mode).
351 			 */
352 			peer_initiator_depth = event->param.conn.initiator_depth;
353 			peer_responder_resources = event->param.conn.responder_resources;
354 		}
355 		smbdirect_connection_negotiate_rdma_resources(sc,
356 							      peer_initiator_depth,
357 							      peer_responder_resources,
358 							      &event->param.conn);
359 
360 		ret = smbdirect_connect_negotiate_start(sc);
361 		if (ret)
362 			smbdirect_socket_schedule_cleanup(sc, ret);
363 		return 0;
364 
365 	default:
366 		break;
367 	}
368 
369 	/*
370 	 * This is an internal error
371 	 */
372 	WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_ESTABLISHED);
373 	smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
374 	return 0;
375 }
376 
377 static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc)
378 {
379 	const struct smbdirect_socket_parameters *sp = &sc->parameters;
380 	struct smbdirect_recv_io *recv_io = NULL;
381 	struct smbdirect_send_io *send_io = NULL;
382 	struct smbdirect_negotiate_req *nreq = NULL;
383 	int ret;
384 
385 	if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED))
386 		return sc->first_error;
387 	sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING;
388 
389 	ret = smbdirect_connection_create_mem_pools(sc);
390 	if (ret) {
391 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
392 			"smbdirect_connection_create_mem_pools() failed %1pe\n",
393 			SMBDIRECT_DEBUG_ERR_PTR(ret));
394 		goto create_mem_pools_failed;
395 	}
396 
397 	/*
398 	 * There is only a single batch credit
399 	 */
400 	atomic_set(&sc->send_io.bcredits.count, 1);
401 
402 	/*
403 	 * Initialize the local credits to post
404 	 * IB_WR_SEND[_WITH_INV].
405 	 */
406 	atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target);
407 
408 	recv_io = smbdirect_connection_get_recv_io(sc);
409 	if (WARN_ON_ONCE(!recv_io)) {
410 		ret = -EINVAL;
411 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
412 			"smbdirect_connection_get_recv_io() failed %1pe\n",
413 			SMBDIRECT_DEBUG_ERR_PTR(ret));
414 		goto get_recv_io_failed;
415 	}
416 	recv_io->cqe.done = smbdirect_connect_negotiate_recv_done;
417 
418 	send_io = smbdirect_connection_alloc_send_io(sc);
419 	if (IS_ERR(send_io)) {
420 		ret = PTR_ERR(send_io);
421 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
422 			"smbdirect_connection_alloc_send_io() failed %1pe\n",
423 			SMBDIRECT_DEBUG_ERR_PTR(ret));
424 		goto alloc_send_io_failed;
425 	}
426 	send_io->cqe.done = smbdirect_connect_negotiate_send_done;
427 
428 	nreq = (struct smbdirect_negotiate_req *)send_io->packet;
429 	nreq->min_version = cpu_to_le16(SMBDIRECT_V1);
430 	nreq->max_version = cpu_to_le16(SMBDIRECT_V1);
431 	nreq->reserved = 0;
432 	nreq->credits_requested = cpu_to_le16(sp->send_credit_target);
433 	nreq->preferred_send_size = cpu_to_le32(sp->max_send_size);
434 	nreq->max_receive_size = cpu_to_le32(sp->max_recv_size);
435 	nreq->max_fragmented_size = cpu_to_le32(sp->max_fragmented_recv_size);
436 
437 	smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO,
438 		"ReqOut: %s%x, %s%x, %s%u, %s%u, %s%u, %s%u\n",
439 		"MinVersion=0x",
440 		le16_to_cpu(nreq->min_version),
441 		"MaxVersion=0x",
442 		le16_to_cpu(nreq->max_version),
443 		"CreditsRequested=",
444 		le16_to_cpu(nreq->credits_requested),
445 		"PreferredSendSize=",
446 		le32_to_cpu(nreq->preferred_send_size),
447 		"MaxRecvSize=",
448 		le32_to_cpu(nreq->max_receive_size),
449 		"MaxFragmentedSize=",
450 		le32_to_cpu(nreq->max_fragmented_size));
451 
452 	send_io->sge[0].addr = ib_dma_map_single(sc->ib.dev,
453 						 nreq,
454 						 sizeof(*nreq),
455 						 DMA_TO_DEVICE);
456 	ret = ib_dma_mapping_error(sc->ib.dev, send_io->sge[0].addr);
457 	if (ret) {
458 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
459 			"ib_dma_mapping_error() failed %1pe\n",
460 			SMBDIRECT_DEBUG_ERR_PTR(ret));
461 		goto dma_mapping_failed;
462 	}
463 
464 	send_io->sge[0].length = sizeof(*nreq);
465 	send_io->sge[0].lkey = sc->ib.pd->local_dma_lkey;
466 	send_io->num_sge = 1;
467 
468 	ib_dma_sync_single_for_device(sc->ib.dev,
469 				      send_io->sge[0].addr,
470 				      send_io->sge[0].length,
471 				      DMA_TO_DEVICE);
472 
473 	smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO,
474 		"sge addr=0x%llx length=%u lkey=0x%x\n",
475 		send_io->sge[0].addr,
476 		send_io->sge[0].length,
477 		send_io->sge[0].lkey);
478 
479 	/*
480 	 * Now post the recv_io buffer in order to get
481 	 * the negotiate response
482 	 */
483 	sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP;
484 	ret = smbdirect_connection_post_recv_io(recv_io);
485 	if (ret) {
486 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
487 			"smbdirect_connection_post_recv_io() failed %1pe\n",
488 			SMBDIRECT_DEBUG_ERR_PTR(ret));
489 		goto post_recv_io_failed;
490 	}
491 
492 	send_io->wr.next = NULL;
493 	send_io->wr.wr_cqe = &send_io->cqe;
494 	send_io->wr.sg_list = send_io->sge;
495 	send_io->wr.num_sge = send_io->num_sge;
496 	send_io->wr.opcode = IB_WR_SEND;
497 	send_io->wr.send_flags = IB_SEND_SIGNALED;
498 
499 	ret = smbdirect_connection_post_send_wr(sc, &send_io->wr);
500 	if (ret) {
501 		/* if we reach here, post send failed */
502 		smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR,
503 			"smbdirect_connection_post_send_wr() failed %1pe\n",
504 			SMBDIRECT_DEBUG_ERR_PTR(ret));
505 		goto post_send_wr_failed;
506 	}
507 
508 	/*
509 	 * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING
510 	 * so that the timer will cause a disconnect.
511 	 */
512 	sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING;
513 	mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work,
514 			 msecs_to_jiffies(sp->negotiate_timeout_msec));
515 
516 	return 0;
517 
518 post_send_wr_failed:
519 	/*
520 	 * ib_dma_unmap_single is called in
521 	 * smbdirect_connection_free_send_io()
522 	 */
523 	smbdirect_connection_free_send_io(send_io);
524 	/*
525 	 * recv_io is given to the rdma layer,
526 	 * we should not put it even on error
527 	 * nor call smbdirect_connection_destroy_mem_pools()
528 	 * it will be cleaned up during disconnect.
529 	 */
530 	return ret;
531 
532 post_recv_io_failed:
533 	/*
534 	 * ib_dma_unmap_single is called in
535 	 * smbdirect_connection_free_send_io()
536 	 */
537 dma_mapping_failed:
538 	smbdirect_connection_free_send_io(send_io);
539 
540 alloc_send_io_failed:
541 	smbdirect_connection_put_recv_io(recv_io);
542 
543 get_recv_io_failed:
544 	smbdirect_connection_destroy_mem_pools(sc);
545 
546 create_mem_pools_failed:
547 	return ret;
548 }
549 
550 static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc)
551 {
552 	struct smbdirect_send_io *send_io =
553 		container_of(wc->wr_cqe, struct smbdirect_send_io, cqe);
554 	struct smbdirect_socket *sc = send_io->socket;
555 
556 	smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO,
557 		"smbdirect_send_io completed. status='%s (%d)', opcode=%d\n",
558 		ib_wc_status_msg(wc->status), wc->status, wc->opcode);
559 
560 	/* Note this frees wc->wr_cqe, but not wc */
561 	smbdirect_connection_free_send_io(send_io);
562 	atomic_dec(&sc->send_io.pending.count);
563 
564 	if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) {
565 		if (wc->status != IB_WC_WR_FLUSH_ERR)
566 			smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR,
567 				"wc->status=%s (%d) wc->opcode=%d\n",
568 				ib_wc_status_msg(wc->status), wc->status, wc->opcode);
569 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
570 		return;
571 	}
572 }
573 
574 static void smbdirect_connect_negotiate_recv_work(struct work_struct *work);
575 
576 static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc)
577 {
578 	struct smbdirect_recv_io *recv_io =
579 		container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe);
580 	struct smbdirect_socket *sc = recv_io->socket;
581 	unsigned long flags;
582 
583 	if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) {
584 		if (wc->status != IB_WC_WR_FLUSH_ERR)
585 			smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR,
586 				"wc->status=%s (%d) wc->opcode=%d\n",
587 				ib_wc_status_msg(wc->status), wc->status, wc->opcode);
588 		goto error;
589 	}
590 
591 	smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO,
592 		"smbdirect_recv_io completed. status='%s (%d)', opcode=%d\n",
593 		ib_wc_status_msg(wc->status), wc->status, wc->opcode);
594 
595 	/*
596 	 * This is an internal error!
597 	 */
598 	if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REP))
599 		goto error;
600 
601 	/*
602 	 * Don't reset timer to the keepalive interval in
603 	 * this will be done in smbdirect_accept_direct_negotiate_recv_work.
604 	 */
605 
606 	ib_dma_sync_single_for_cpu(sc->ib.dev,
607 				   recv_io->sge.addr,
608 				   recv_io->sge.length,
609 				   DMA_FROM_DEVICE);
610 
611 	/*
612 	 * Only remember recv_io if it has enough bytes,
613 	 * this gives smbdirect_accept_negotiate_recv_work enough
614 	 * information in order to disconnect if it was not
615 	 * valid.
616 	 */
617 	sc->recv_io.reassembly.full_packet_received = true;
618 	if (wc->byte_len >= sizeof(struct smbdirect_negotiate_resp))
619 		smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0);
620 	else
621 		smbdirect_connection_put_recv_io(recv_io);
622 
623 	/*
624 	 * We continue via the workqueue as we may have
625 	 * complex work that might sleep.
626 	 *
627 	 * So we defer further processing of the negotiation
628 	 * to smbdirect_connect_negotiate_recv_work().
629 	 */
630 	spin_lock_irqsave(&sc->connect.lock, flags);
631 	if (!sc->first_error) {
632 		INIT_WORK(&sc->connect.work, smbdirect_connect_negotiate_recv_work);
633 		if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)
634 			queue_work(sc->workqueues.connect, &sc->connect.work);
635 	}
636 	spin_unlock_irqrestore(&sc->connect.lock, flags);
637 
638 	return;
639 
640 error:
641 	/*
642 	 * recv_io.posted.refill_work is still disabled,
643 	 * so smbdirect_connection_put_recv_io() won't
644 	 * start it.
645 	 */
646 	smbdirect_connection_put_recv_io(recv_io);
647 	smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
648 }
649 
650 static void smbdirect_connect_negotiate_recv_work(struct work_struct *work)
651 {
652 	struct smbdirect_socket *sc =
653 		container_of(work, struct smbdirect_socket, connect.work);
654 	struct smbdirect_socket_parameters *sp = &sc->parameters;
655 	struct smbdirect_recv_io *recv_io;
656 	struct smbdirect_negotiate_resp *nrep;
657 	unsigned long flags;
658 	u16 negotiated_version;
659 	u16 credits_requested;
660 	u16 credits_granted;
661 	u32 status;
662 	u32 max_readwrite_size;
663 	u32 preferred_send_size;
664 	u32 max_receive_size;
665 	u32 max_fragmented_size;
666 	int posted;
667 	int ret;
668 
669 	if (sc->first_error)
670 		return;
671 
672 	/*
673 	 * make sure we won't start again...
674 	 */
675 	disable_work(work);
676 
677 	/*
678 	 * Reset timer to the keepalive interval in
679 	 * order to trigger our next keepalive message.
680 	 */
681 	sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE;
682 	mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work,
683 			 msecs_to_jiffies(sp->keepalive_interval_msec));
684 
685 	/*
686 	 * If smbdirect_connect_negotiate_recv_done() detected an
687 	 * invalid request we want to disconnect.
688 	 */
689 	recv_io = smbdirect_connection_reassembly_first_recv_io(sc);
690 	if (!recv_io) {
691 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
692 		return;
693 	}
694 	spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
695 	sc->recv_io.reassembly.queue_length--;
696 	list_del(&recv_io->list);
697 	spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
698 	smbdirect_connection_put_recv_io(recv_io);
699 
700 	if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING))
701 		return;
702 
703 	/*
704 	 * Note recv_io is already part of the free list,
705 	 * as we just called smbdirect_connection_put_recv_io(),
706 	 * but it won't be reused before we call
707 	 * smbdirect_connection_recv_io_refill() below.
708 	 */
709 
710 	nrep = (struct smbdirect_negotiate_resp *)recv_io->packet;
711 	negotiated_version = le16_to_cpu(nrep->negotiated_version);
712 	credits_requested = le16_to_cpu(nrep->credits_requested);
713 	credits_granted = le16_to_cpu(nrep->credits_granted);
714 	status = le32_to_cpu(nrep->status);
715 	max_readwrite_size = le32_to_cpu(nrep->max_readwrite_size);
716 	preferred_send_size = le32_to_cpu(nrep->preferred_send_size);
717 	max_receive_size = le32_to_cpu(nrep->max_receive_size);
718 	max_fragmented_size = le32_to_cpu(nrep->max_fragmented_size);
719 
720 	smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO,
721 		"RepIn: %s%x, %s%x, %s%x, %s%u, %s%u, %s%x, %s%u, %s%u, %s%u, %s%u\n",
722 		"MinVersion=0x",
723 		le16_to_cpu(nrep->min_version),
724 		"MaxVersion=0x",
725 		le16_to_cpu(nrep->max_version),
726 		"NegotiatedVersion=0x",
727 		le16_to_cpu(nrep->negotiated_version),
728 		"CreditsRequested=",
729 		le16_to_cpu(nrep->credits_requested),
730 		"CreditsGranted=",
731 		le16_to_cpu(nrep->credits_granted),
732 		"Status=0x",
733 		le32_to_cpu(nrep->status),
734 		"MaxReadWriteSize=",
735 		le32_to_cpu(nrep->max_readwrite_size),
736 		"PreferredSendSize=",
737 		le32_to_cpu(nrep->preferred_send_size),
738 		"MaxRecvSize=",
739 		le32_to_cpu(nrep->max_receive_size),
740 		"MaxFragmentedSize=",
741 		le32_to_cpu(nrep->max_fragmented_size));
742 
743 	if (negotiated_version != SMBDIRECT_V1) {
744 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
745 			"invalid: negotiated_version=0x%x\n",
746 			negotiated_version);
747 		smbdirect_socket_schedule_cleanup(sc, -ECONNREFUSED);
748 		return;
749 	}
750 
751 	if (status != le32_to_cpu(STATUS_SUCCESS)) {
752 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
753 			"invalid: status=0x%x != 0x0\n",
754 			status);
755 		smbdirect_socket_schedule_cleanup(sc, -ECONNREFUSED);
756 		return;
757 	}
758 
759 	if (max_receive_size < SMBDIRECT_MIN_RECEIVE_SIZE) {
760 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
761 			"invalid: max_receive_size=%u < %u\n",
762 			max_receive_size,
763 			SMBDIRECT_MIN_RECEIVE_SIZE);
764 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
765 		return;
766 	}
767 
768 	if (max_fragmented_size < SMBDIRECT_MIN_FRAGMENTED_SIZE) {
769 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
770 			"invalid: max_fragmented_size=%u < %u\n",
771 			max_fragmented_size,
772 			SMBDIRECT_MIN_FRAGMENTED_SIZE);
773 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
774 		return;
775 	}
776 
777 	if (credits_granted == 0) {
778 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
779 			"invalid: credits_granted == 0\n");
780 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
781 		return;
782 	}
783 
784 	if (credits_requested == 0) {
785 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
786 			"invalid: credits_requested == 0\n");
787 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
788 		return;
789 	}
790 
791 	if (preferred_send_size > sp->max_recv_size) {
792 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
793 			"invalid: preferred_send_size=%u > max_recv_size=%u\n",
794 			preferred_send_size,
795 			sp->max_recv_size);
796 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
797 		return;
798 	}
799 
800 	/*
801 	 * We take the value from the peer, which is checked to be higher than 0,
802 	 * but we limit it to the max value we support in order to have
803 	 * the main logic simpler.
804 	 */
805 	sc->recv_io.credits.target = credits_requested;
806 	sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target,
807 					   sp->recv_credit_max);
808 
809 	/*
810 	 * At least the value of SMBDIRECT_MIN_RECEIVE_SIZE is used.
811 	 */
812 	sp->max_recv_size = min_t(u32, sp->max_recv_size, preferred_send_size);
813 	sp->max_recv_size = max_t(u32, sp->max_recv_size, SMBDIRECT_MIN_RECEIVE_SIZE);
814 
815 	/*
816 	 * We already sent our sp->max_fragmented_recv_size
817 	 * to the peer, so we can't lower it here any more.
818 	 *
819 	 * TODO: but if the peer lowered sp->max_recv_size
820 	 * we will have to adjust our number of buffers.
821 	 *
822 	 * But for now we keep it as the cifs.ko code
823 	 * worked before.
824 	 */
825 
826 	/*
827 	 * Note nrep->max_receive_size was already checked against
828 	 * SMBDIRECT_MIN_RECEIVE_SIZE above.
829 	 */
830 	sp->max_send_size = min_t(u32, sp->max_send_size, max_receive_size);
831 
832 	/*
833 	 * Make sure the resulting max_frmr_depth is at least 1,
834 	 * which means max_read_write_size needs to be at least PAGE_SIZE.
835 	 */
836 	sp->max_read_write_size = min_t(u32, sp->max_frmr_depth * PAGE_SIZE,
837 					max_readwrite_size);
838 	if (sp->max_read_write_size < PAGE_SIZE) {
839 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
840 			"invalid: max_readwrite_size=%u < PAGE_SIZE(%lu)\n",
841 			max_readwrite_size,
842 			PAGE_SIZE);
843 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
844 		return;
845 	}
846 	sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE;
847 
848 	/*
849 	 * Note nrep->credits_granted was already checked against 0 above.
850 	 */
851 	atomic_set(&sc->send_io.credits.count, credits_granted);
852 
853 	/*
854 	 * Note nrep->max_fragmented_size was already checked against
855 	 * SMBDIRECT_MIN_FRAGMENTED_SIZE above.
856 	 */
857 	sp->max_fragmented_send_size = max_fragmented_size;
858 
859 	ret = smbdirect_connection_create_mr_list(sc);
860 	if (ret) {
861 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
862 			"smbdirect_connection_create_mr_list() failed %1pe\n",
863 			SMBDIRECT_DEBUG_ERR_PTR(ret));
864 		smbdirect_socket_schedule_cleanup(sc, ret);
865 		return;
866 	}
867 
868 	/*
869 	 * Prepare for receiving data_transfer messages
870 	 */
871 	sc->recv_io.reassembly.full_packet_received = true;
872 	sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER;
873 	list_for_each_entry(recv_io, &sc->recv_io.free.list, list)
874 		recv_io->cqe.done = smbdirect_connection_recv_io_done;
875 	recv_io = NULL;
876 
877 	/*
878 	 * We should at least post 1 smbdirect_recv_io!
879 	 */
880 	posted = smbdirect_connection_recv_io_refill(sc);
881 	if (posted < 1) {
882 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
883 			"smbdirect_connection_recv_io_refill() failed %1pe\n",
884 			SMBDIRECT_DEBUG_ERR_PTR(ret));
885 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
886 		return;
887 	}
888 
889 	/*
890 	 * smbdirect_connection_negotiation_done()
891 	 * will setup all required things and wake up
892 	 * the waiter.
893 	 */
894 	smbdirect_connection_negotiation_done(sc);
895 }
896 
897 int smbdirect_connect_sync(struct smbdirect_socket *sc,
898 			   const struct sockaddr *dst)
899 {
900 	int ret;
901 
902 	ret = smbdirect_connect(sc, dst);
903 	if (ret) {
904 		smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR,
905 			"smbdirect_connect(%pISpsfc) failed %1pe\n",
906 			dst, SMBDIRECT_DEBUG_ERR_PTR(ret));
907 		return ret;
908 	}
909 
910 	ret = smbdirect_connection_wait_for_connected(sc);
911 	if (ret) {
912 		int lvl = SMBDIRECT_LOG_ERR;
913 
914 		if (ret == -ENODEV)
915 			lvl = SMBDIRECT_LOG_INFO;
916 
917 		smbdirect_log_rdma_event(sc, lvl,
918 			"wait for smbdirect_connect(%pISpsfc) failed %1pe\n",
919 			dst, SMBDIRECT_DEBUG_ERR_PTR(ret));
920 		return ret;
921 	}
922 
923 	return 0;
924 }
925 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect_sync);
926