1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 */
9
10 #define SUBMOD_NAME "smb_direct"
11
12 #include <linux/kthread.h>
13 #include <linux/list.h>
14 #include <linux/mempool.h>
15 #include <linux/highmem.h>
16 #include <linux/scatterlist.h>
17 #include <linux/string_choices.h>
18 #include <rdma/ib_verbs.h>
19 #include <rdma/rdma_cm.h>
20 #include <rdma/rw.h>
21
22 #include "glob.h"
23 #include "connection.h"
24 #include "smb_common.h"
25 #include "../common/smb2status.h"
26 #include "../common/smbdirect/smbdirect.h"
27 #include "../common/smbdirect/smbdirect_pdu.h"
28 #include "../common/smbdirect/smbdirect_socket.h"
29 #include "transport_rdma.h"
30
31 #define SMB_DIRECT_PORT_IWARP 5445
32 #define SMB_DIRECT_PORT_INFINIBAND 445
33
34 #define SMB_DIRECT_VERSION_LE cpu_to_le16(SMBDIRECT_V1)
35
36 /* SMB_DIRECT negotiation timeout (for the server) in seconds */
37 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 5
38
39 /* The timeout to wait for a keepalive message from peer in seconds */
40 #define SMB_DIRECT_KEEPALIVE_SEND_INTERVAL 120
41
42 /* The timeout to wait for a keepalive message from peer in seconds */
43 #define SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT 5
44
45 /*
46 * Default maximum number of RDMA read/write outstanding on this connection
47 * This value is possibly decreased during QP creation on hardware limit
48 */
49 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8
50
51 /* Maximum number of retries on data transfer operations */
52 #define SMB_DIRECT_CM_RETRY 6
53 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
54 #define SMB_DIRECT_CM_RNR_RETRY 0
55
56 /*
57 * User configurable initial values per SMB_DIRECT transport connection
58 * as defined in [MS-SMBD] 3.1.1.1
59 * Those may change after a SMB_DIRECT negotiation
60 */
61
62 /* Set 445 port to SMB Direct port by default */
63 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
64
65 /* The local peer's maximum number of credits to grant to the peer */
66 static int smb_direct_receive_credit_max = 255;
67
68 /* The remote peer's credit request of local peer */
69 static int smb_direct_send_credit_target = 255;
70
71 /* The maximum single message size can be sent to remote peer */
72 static int smb_direct_max_send_size = 1364;
73
74 /* The maximum fragmented upper-layer payload receive size supported */
75 static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
76
77 /* The maximum single-message size which can be received */
78 static int smb_direct_max_receive_size = 1364;
79
80 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
81
82 static LIST_HEAD(smb_direct_device_list);
83 static DEFINE_RWLOCK(smb_direct_device_lock);
84
85 struct smb_direct_device {
86 struct ib_device *ib_dev;
87 struct list_head list;
88 };
89
90 static struct smb_direct_listener {
91 struct rdma_cm_id *cm_id;
92 } smb_direct_listener;
93
94 static struct workqueue_struct *smb_direct_wq;
95
96 struct smb_direct_transport {
97 struct ksmbd_transport transport;
98
99 struct smbdirect_socket socket;
100 };
101
102 #define KSMBD_TRANS(t) (&(t)->transport)
103 #define SMBD_TRANS(t) (container_of(t, \
104 struct smb_direct_transport, transport))
105
106 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
107
init_smbd_max_io_size(unsigned int sz)108 void init_smbd_max_io_size(unsigned int sz)
109 {
110 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
111 smb_direct_max_read_write_size = sz;
112 }
113
get_smbd_max_read_write_size(struct ksmbd_transport * kt)114 unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt)
115 {
116 struct smb_direct_transport *t;
117 struct smbdirect_socket *sc;
118 struct smbdirect_socket_parameters *sp;
119
120 if (kt->ops != &ksmbd_smb_direct_transport_ops)
121 return 0;
122
123 t = SMBD_TRANS(kt);
124 sc = &t->socket;
125 sp = &sc->parameters;
126
127 return sp->max_read_write_size;
128 }
129
get_buf_page_count(void * buf,int size)130 static inline int get_buf_page_count(void *buf, int size)
131 {
132 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
133 (uintptr_t)buf / PAGE_SIZE;
134 }
135
136 static void smb_direct_destroy_pools(struct smbdirect_socket *sc);
137 static void smb_direct_post_recv_credits(struct work_struct *work);
138 static int smb_direct_post_send_data(struct smbdirect_socket *sc,
139 struct smbdirect_send_batch *send_ctx,
140 struct kvec *iov, int niov,
141 int remaining_data_length);
142
143 static inline void
smbdirect_recv_io_payload(struct smbdirect_recv_io * recvmsg)144 *smbdirect_recv_io_payload(struct smbdirect_recv_io *recvmsg)
145 {
146 return (void *)recvmsg->packet;
147 }
148
149 static struct
get_free_recvmsg(struct smbdirect_socket * sc)150 smbdirect_recv_io *get_free_recvmsg(struct smbdirect_socket *sc)
151 {
152 struct smbdirect_recv_io *recvmsg = NULL;
153 unsigned long flags;
154
155 spin_lock_irqsave(&sc->recv_io.free.lock, flags);
156 if (!list_empty(&sc->recv_io.free.list)) {
157 recvmsg = list_first_entry(&sc->recv_io.free.list,
158 struct smbdirect_recv_io,
159 list);
160 list_del(&recvmsg->list);
161 }
162 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags);
163 return recvmsg;
164 }
165
put_recvmsg(struct smbdirect_socket * sc,struct smbdirect_recv_io * recvmsg)166 static void put_recvmsg(struct smbdirect_socket *sc,
167 struct smbdirect_recv_io *recvmsg)
168 {
169 unsigned long flags;
170
171 if (likely(recvmsg->sge.length != 0)) {
172 ib_dma_unmap_single(sc->ib.dev,
173 recvmsg->sge.addr,
174 recvmsg->sge.length,
175 DMA_FROM_DEVICE);
176 recvmsg->sge.length = 0;
177 }
178
179 spin_lock_irqsave(&sc->recv_io.free.lock, flags);
180 list_add(&recvmsg->list, &sc->recv_io.free.list);
181 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags);
182
183 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work);
184 }
185
enqueue_reassembly(struct smbdirect_socket * sc,struct smbdirect_recv_io * recvmsg,int data_length)186 static void enqueue_reassembly(struct smbdirect_socket *sc,
187 struct smbdirect_recv_io *recvmsg,
188 int data_length)
189 {
190 unsigned long flags;
191
192 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
193 list_add_tail(&recvmsg->list, &sc->recv_io.reassembly.list);
194 sc->recv_io.reassembly.queue_length++;
195 /*
196 * Make sure reassembly_data_length is updated after list and
197 * reassembly_queue_length are updated. On the dequeue side
198 * reassembly_data_length is checked without a lock to determine
199 * if reassembly_queue_length and list is up to date
200 */
201 virt_wmb();
202 sc->recv_io.reassembly.data_length += data_length;
203 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
204 }
205
get_first_reassembly(struct smbdirect_socket * sc)206 static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *sc)
207 {
208 if (!list_empty(&sc->recv_io.reassembly.list))
209 return list_first_entry(&sc->recv_io.reassembly.list,
210 struct smbdirect_recv_io, list);
211 else
212 return NULL;
213 }
214
smb_direct_disconnect_wake_up_all(struct smbdirect_socket * sc)215 static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc)
216 {
217 /*
218 * Wake up all waiters in all wait queues
219 * in order to notice the broken connection.
220 */
221 wake_up_all(&sc->status_wait);
222 wake_up_all(&sc->send_io.lcredits.wait_queue);
223 wake_up_all(&sc->send_io.credits.wait_queue);
224 wake_up_all(&sc->send_io.pending.zero_wait_queue);
225 wake_up_all(&sc->recv_io.reassembly.wait_queue);
226 wake_up_all(&sc->rw_io.credits.wait_queue);
227 }
228
smb_direct_disconnect_rdma_work(struct work_struct * work)229 static void smb_direct_disconnect_rdma_work(struct work_struct *work)
230 {
231 struct smbdirect_socket *sc =
232 container_of(work, struct smbdirect_socket, disconnect_work);
233
234 /*
235 * make sure this and other work is not queued again
236 * but here we don't block and avoid
237 * disable[_delayed]_work_sync()
238 */
239 disable_work(&sc->disconnect_work);
240 disable_work(&sc->recv_io.posted.refill_work);
241 disable_delayed_work(&sc->idle.timer_work);
242 disable_work(&sc->idle.immediate_work);
243
244 if (sc->first_error == 0)
245 sc->first_error = -ECONNABORTED;
246
247 switch (sc->status) {
248 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
249 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
250 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED:
251 case SMBDIRECT_SOCKET_CONNECTED:
252 case SMBDIRECT_SOCKET_ERROR:
253 sc->status = SMBDIRECT_SOCKET_DISCONNECTING;
254 rdma_disconnect(sc->rdma.cm_id);
255 break;
256
257 case SMBDIRECT_SOCKET_CREATED:
258 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED:
259 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING:
260 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
261 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED:
262 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING:
263 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
264 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED:
265 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING:
266 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED:
267 /*
268 * rdma_accept() never reached
269 * RDMA_CM_EVENT_ESTABLISHED
270 */
271 sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
272 break;
273
274 case SMBDIRECT_SOCKET_DISCONNECTING:
275 case SMBDIRECT_SOCKET_DISCONNECTED:
276 case SMBDIRECT_SOCKET_DESTROYED:
277 break;
278 }
279
280 /*
281 * Wake up all waiters in all wait queues
282 * in order to notice the broken connection.
283 */
284 smb_direct_disconnect_wake_up_all(sc);
285 }
286
287 static void
smb_direct_disconnect_rdma_connection(struct smbdirect_socket * sc)288 smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc)
289 {
290 /*
291 * make sure other work (than disconnect_work) is
292 * not queued again but here we don't block and avoid
293 * disable[_delayed]_work_sync()
294 */
295 disable_work(&sc->recv_io.posted.refill_work);
296 disable_work(&sc->idle.immediate_work);
297 disable_delayed_work(&sc->idle.timer_work);
298
299 if (sc->first_error == 0)
300 sc->first_error = -ECONNABORTED;
301
302 switch (sc->status) {
303 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
304 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
305 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED:
306 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED:
307 case SMBDIRECT_SOCKET_ERROR:
308 case SMBDIRECT_SOCKET_DISCONNECTING:
309 case SMBDIRECT_SOCKET_DISCONNECTED:
310 case SMBDIRECT_SOCKET_DESTROYED:
311 /*
312 * Keep the current error status
313 */
314 break;
315
316 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED:
317 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING:
318 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED;
319 break;
320
321 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED:
322 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING:
323 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED;
324 break;
325
326 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED:
327 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING:
328 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED;
329 break;
330
331 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
332 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
333 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED;
334 break;
335
336 case SMBDIRECT_SOCKET_CREATED:
337 sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
338 break;
339
340 case SMBDIRECT_SOCKET_CONNECTED:
341 sc->status = SMBDIRECT_SOCKET_ERROR;
342 break;
343 }
344
345 /*
346 * Wake up all waiters in all wait queues
347 * in order to notice the broken connection.
348 */
349 smb_direct_disconnect_wake_up_all(sc);
350
351 queue_work(sc->workqueue, &sc->disconnect_work);
352 }
353
smb_direct_send_immediate_work(struct work_struct * work)354 static void smb_direct_send_immediate_work(struct work_struct *work)
355 {
356 struct smbdirect_socket *sc =
357 container_of(work, struct smbdirect_socket, idle.immediate_work);
358
359 if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
360 return;
361
362 smb_direct_post_send_data(sc, NULL, NULL, 0, 0);
363 }
364
smb_direct_idle_connection_timer(struct work_struct * work)365 static void smb_direct_idle_connection_timer(struct work_struct *work)
366 {
367 struct smbdirect_socket *sc =
368 container_of(work, struct smbdirect_socket, idle.timer_work.work);
369 struct smbdirect_socket_parameters *sp = &sc->parameters;
370
371 if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) {
372 smb_direct_disconnect_rdma_connection(sc);
373 return;
374 }
375
376 if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
377 return;
378
379 /*
380 * Now use the keepalive timeout (instead of keepalive interval)
381 * in order to wait for a response
382 */
383 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING;
384 mod_delayed_work(sc->workqueue, &sc->idle.timer_work,
385 msecs_to_jiffies(sp->keepalive_timeout_msec));
386 queue_work(sc->workqueue, &sc->idle.immediate_work);
387 }
388
alloc_transport(struct rdma_cm_id * cm_id)389 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
390 {
391 struct smb_direct_transport *t;
392 struct smbdirect_socket *sc;
393 struct smbdirect_socket_parameters *sp;
394 struct ksmbd_conn *conn;
395
396 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP);
397 if (!t)
398 return NULL;
399 sc = &t->socket;
400 smbdirect_socket_init(sc);
401 sp = &sc->parameters;
402
403 sc->workqueue = smb_direct_wq;
404
405 INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work);
406
407 sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000;
408 sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH;
409 sp->responder_resources = 1;
410 sp->recv_credit_max = smb_direct_receive_credit_max;
411 sp->send_credit_target = smb_direct_send_credit_target;
412 sp->max_send_size = smb_direct_max_send_size;
413 sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
414 sp->max_recv_size = smb_direct_max_receive_size;
415 sp->max_read_write_size = smb_direct_max_read_write_size;
416 sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000;
417 sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000;
418
419 sc->rdma.cm_id = cm_id;
420 cm_id->context = sc;
421
422 sc->ib.dev = sc->rdma.cm_id->device;
423
424 INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer);
425
426 conn = ksmbd_conn_alloc();
427 if (!conn)
428 goto err;
429
430 down_write(&conn_list_lock);
431 hash_add(conn_list, &conn->hlist, 0);
432 up_write(&conn_list_lock);
433
434 conn->transport = KSMBD_TRANS(t);
435 KSMBD_TRANS(t)->conn = conn;
436 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
437 return t;
438 err:
439 kfree(t);
440 return NULL;
441 }
442
smb_direct_free_transport(struct ksmbd_transport * kt)443 static void smb_direct_free_transport(struct ksmbd_transport *kt)
444 {
445 kfree(SMBD_TRANS(kt));
446 }
447
free_transport(struct smb_direct_transport * t)448 static void free_transport(struct smb_direct_transport *t)
449 {
450 struct smbdirect_socket *sc = &t->socket;
451 struct smbdirect_recv_io *recvmsg;
452
453 disable_work_sync(&sc->disconnect_work);
454 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING)
455 smb_direct_disconnect_rdma_work(&sc->disconnect_work);
456 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED)
457 wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
458
459 /*
460 * Wake up all waiters in all wait queues
461 * in order to notice the broken connection.
462 *
463 * Most likely this was already called via
464 * smb_direct_disconnect_rdma_work(), but call it again...
465 */
466 smb_direct_disconnect_wake_up_all(sc);
467
468 disable_work_sync(&sc->recv_io.posted.refill_work);
469 disable_delayed_work_sync(&sc->idle.timer_work);
470 disable_work_sync(&sc->idle.immediate_work);
471
472 if (sc->rdma.cm_id)
473 rdma_lock_handler(sc->rdma.cm_id);
474
475 if (sc->ib.qp) {
476 ib_drain_qp(sc->ib.qp);
477 sc->ib.qp = NULL;
478 rdma_destroy_qp(sc->rdma.cm_id);
479 }
480
481 ksmbd_debug(RDMA, "drain the reassembly queue\n");
482 do {
483 unsigned long flags;
484
485 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
486 recvmsg = get_first_reassembly(sc);
487 if (recvmsg) {
488 list_del(&recvmsg->list);
489 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
490 put_recvmsg(sc, recvmsg);
491 } else {
492 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
493 }
494 } while (recvmsg);
495 sc->recv_io.reassembly.data_length = 0;
496
497 if (sc->ib.send_cq)
498 ib_free_cq(sc->ib.send_cq);
499 if (sc->ib.recv_cq)
500 ib_free_cq(sc->ib.recv_cq);
501 if (sc->ib.pd)
502 ib_dealloc_pd(sc->ib.pd);
503 if (sc->rdma.cm_id) {
504 rdma_unlock_handler(sc->rdma.cm_id);
505 rdma_destroy_id(sc->rdma.cm_id);
506 }
507
508 smb_direct_destroy_pools(sc);
509 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
510 }
511
512 static struct smbdirect_send_io
smb_direct_alloc_sendmsg(struct smbdirect_socket * sc)513 *smb_direct_alloc_sendmsg(struct smbdirect_socket *sc)
514 {
515 struct smbdirect_send_io *msg;
516
517 msg = mempool_alloc(sc->send_io.mem.pool, KSMBD_DEFAULT_GFP);
518 if (!msg)
519 return ERR_PTR(-ENOMEM);
520 msg->socket = sc;
521 INIT_LIST_HEAD(&msg->sibling_list);
522 msg->num_sge = 0;
523 return msg;
524 }
525
smb_direct_free_sendmsg(struct smbdirect_socket * sc,struct smbdirect_send_io * msg)526 static void smb_direct_free_sendmsg(struct smbdirect_socket *sc,
527 struct smbdirect_send_io *msg)
528 {
529 int i;
530
531 /*
532 * The list needs to be empty!
533 * The caller should take care of it.
534 */
535 WARN_ON_ONCE(!list_empty(&msg->sibling_list));
536
537 if (msg->num_sge > 0) {
538 ib_dma_unmap_single(sc->ib.dev,
539 msg->sge[0].addr, msg->sge[0].length,
540 DMA_TO_DEVICE);
541 for (i = 1; i < msg->num_sge; i++)
542 ib_dma_unmap_page(sc->ib.dev,
543 msg->sge[i].addr, msg->sge[i].length,
544 DMA_TO_DEVICE);
545 }
546 mempool_free(msg, sc->send_io.mem.pool);
547 }
548
smb_direct_check_recvmsg(struct smbdirect_recv_io * recvmsg)549 static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg)
550 {
551 struct smbdirect_socket *sc = recvmsg->socket;
552
553 switch (sc->recv_io.expected) {
554 case SMBDIRECT_EXPECT_DATA_TRANSFER: {
555 struct smbdirect_data_transfer *req =
556 (struct smbdirect_data_transfer *)recvmsg->packet;
557 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
558 + le32_to_cpu(req->data_offset));
559 ksmbd_debug(RDMA,
560 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
561 le16_to_cpu(req->credits_granted),
562 le16_to_cpu(req->credits_requested),
563 req->data_length, req->remaining_data_length,
564 hdr->ProtocolId, hdr->Command);
565 return 0;
566 }
567 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: {
568 struct smbdirect_negotiate_req *req =
569 (struct smbdirect_negotiate_req *)recvmsg->packet;
570 ksmbd_debug(RDMA,
571 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
572 le16_to_cpu(req->min_version),
573 le16_to_cpu(req->max_version),
574 le16_to_cpu(req->credits_requested),
575 le32_to_cpu(req->preferred_send_size),
576 le32_to_cpu(req->max_receive_size),
577 le32_to_cpu(req->max_fragmented_size));
578 if (le16_to_cpu(req->min_version) > 0x0100 ||
579 le16_to_cpu(req->max_version) < 0x0100)
580 return -EOPNOTSUPP;
581 if (le16_to_cpu(req->credits_requested) <= 0 ||
582 le32_to_cpu(req->max_receive_size) <= 128 ||
583 le32_to_cpu(req->max_fragmented_size) <=
584 128 * 1024)
585 return -ECONNABORTED;
586
587 return 0;
588 }
589 case SMBDIRECT_EXPECT_NEGOTIATE_REP:
590 /* client only */
591 break;
592 }
593
594 /* This is an internal error */
595 return -EINVAL;
596 }
597
recv_done(struct ib_cq * cq,struct ib_wc * wc)598 static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
599 {
600 struct smbdirect_recv_io *recvmsg;
601 struct smbdirect_socket *sc;
602 struct smbdirect_socket_parameters *sp;
603
604 recvmsg = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe);
605 sc = recvmsg->socket;
606 sp = &sc->parameters;
607
608 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
609 put_recvmsg(sc, recvmsg);
610 if (wc->status != IB_WC_WR_FLUSH_ERR) {
611 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
612 ib_wc_status_msg(wc->status), wc->status,
613 wc->opcode);
614 smb_direct_disconnect_rdma_connection(sc);
615 }
616 return;
617 }
618
619 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
620 ib_wc_status_msg(wc->status), wc->status,
621 wc->opcode);
622
623 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
624 recvmsg->sge.length, DMA_FROM_DEVICE);
625
626 /*
627 * Reset timer to the keepalive interval in
628 * order to trigger our next keepalive message.
629 */
630 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE;
631 mod_delayed_work(sc->workqueue, &sc->idle.timer_work,
632 msecs_to_jiffies(sp->keepalive_interval_msec));
633
634 switch (sc->recv_io.expected) {
635 case SMBDIRECT_EXPECT_NEGOTIATE_REQ:
636 if (wc->byte_len < sizeof(struct smbdirect_negotiate_req)) {
637 put_recvmsg(sc, recvmsg);
638 smb_direct_disconnect_rdma_connection(sc);
639 return;
640 }
641 sc->recv_io.reassembly.full_packet_received = true;
642 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED);
643 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING;
644 enqueue_reassembly(sc, recvmsg, 0);
645 wake_up(&sc->status_wait);
646 return;
647 case SMBDIRECT_EXPECT_DATA_TRANSFER: {
648 struct smbdirect_data_transfer *data_transfer =
649 (struct smbdirect_data_transfer *)recvmsg->packet;
650 u32 remaining_data_length, data_offset, data_length;
651 u16 old_recv_credit_target;
652
653 if (wc->byte_len <
654 offsetof(struct smbdirect_data_transfer, padding)) {
655 put_recvmsg(sc, recvmsg);
656 smb_direct_disconnect_rdma_connection(sc);
657 return;
658 }
659
660 remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length);
661 data_length = le32_to_cpu(data_transfer->data_length);
662 data_offset = le32_to_cpu(data_transfer->data_offset);
663 if (wc->byte_len < data_offset ||
664 wc->byte_len < (u64)data_offset + data_length) {
665 put_recvmsg(sc, recvmsg);
666 smb_direct_disconnect_rdma_connection(sc);
667 return;
668 }
669 if (remaining_data_length > sp->max_fragmented_recv_size ||
670 data_length > sp->max_fragmented_recv_size ||
671 (u64)remaining_data_length + (u64)data_length >
672 (u64)sp->max_fragmented_recv_size) {
673 put_recvmsg(sc, recvmsg);
674 smb_direct_disconnect_rdma_connection(sc);
675 return;
676 }
677
678 if (data_length) {
679 if (sc->recv_io.reassembly.full_packet_received)
680 recvmsg->first_segment = true;
681
682 if (le32_to_cpu(data_transfer->remaining_data_length))
683 sc->recv_io.reassembly.full_packet_received = false;
684 else
685 sc->recv_io.reassembly.full_packet_received = true;
686 }
687
688 atomic_dec(&sc->recv_io.posted.count);
689 atomic_dec(&sc->recv_io.credits.count);
690
691 old_recv_credit_target = sc->recv_io.credits.target;
692 sc->recv_io.credits.target =
693 le16_to_cpu(data_transfer->credits_requested);
694 sc->recv_io.credits.target =
695 min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max);
696 sc->recv_io.credits.target =
697 max_t(u16, sc->recv_io.credits.target, 1);
698 atomic_add(le16_to_cpu(data_transfer->credits_granted),
699 &sc->send_io.credits.count);
700
701 if (le16_to_cpu(data_transfer->flags) &
702 SMBDIRECT_FLAG_RESPONSE_REQUESTED)
703 queue_work(sc->workqueue, &sc->idle.immediate_work);
704
705 if (atomic_read(&sc->send_io.credits.count) > 0)
706 wake_up(&sc->send_io.credits.wait_queue);
707
708 if (data_length) {
709 if (sc->recv_io.credits.target > old_recv_credit_target)
710 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work);
711
712 enqueue_reassembly(sc, recvmsg, (int)data_length);
713 wake_up(&sc->recv_io.reassembly.wait_queue);
714 } else
715 put_recvmsg(sc, recvmsg);
716
717 return;
718 }
719 case SMBDIRECT_EXPECT_NEGOTIATE_REP:
720 /* client only */
721 break;
722 }
723
724 /*
725 * This is an internal error!
726 */
727 WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER);
728 put_recvmsg(sc, recvmsg);
729 smb_direct_disconnect_rdma_connection(sc);
730 }
731
smb_direct_post_recv(struct smbdirect_socket * sc,struct smbdirect_recv_io * recvmsg)732 static int smb_direct_post_recv(struct smbdirect_socket *sc,
733 struct smbdirect_recv_io *recvmsg)
734 {
735 struct smbdirect_socket_parameters *sp = &sc->parameters;
736 struct ib_recv_wr wr;
737 int ret;
738
739 recvmsg->sge.addr = ib_dma_map_single(sc->ib.dev,
740 recvmsg->packet,
741 sp->max_recv_size,
742 DMA_FROM_DEVICE);
743 ret = ib_dma_mapping_error(sc->ib.dev, recvmsg->sge.addr);
744 if (ret)
745 return ret;
746 recvmsg->sge.length = sp->max_recv_size;
747 recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey;
748 recvmsg->cqe.done = recv_done;
749
750 wr.wr_cqe = &recvmsg->cqe;
751 wr.next = NULL;
752 wr.sg_list = &recvmsg->sge;
753 wr.num_sge = 1;
754
755 ret = ib_post_recv(sc->ib.qp, &wr, NULL);
756 if (ret) {
757 pr_err("Can't post recv: %d\n", ret);
758 ib_dma_unmap_single(sc->ib.dev,
759 recvmsg->sge.addr, recvmsg->sge.length,
760 DMA_FROM_DEVICE);
761 recvmsg->sge.length = 0;
762 smb_direct_disconnect_rdma_connection(sc);
763 return ret;
764 }
765 return ret;
766 }
767
smb_direct_read(struct ksmbd_transport * t,char * buf,unsigned int size,int unused)768 static int smb_direct_read(struct ksmbd_transport *t, char *buf,
769 unsigned int size, int unused)
770 {
771 struct smbdirect_recv_io *recvmsg;
772 struct smbdirect_data_transfer *data_transfer;
773 int to_copy, to_read, data_read, offset;
774 u32 data_length, remaining_data_length, data_offset;
775 int rc;
776 struct smb_direct_transport *st = SMBD_TRANS(t);
777 struct smbdirect_socket *sc = &st->socket;
778
779 again:
780 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
781 pr_err("disconnected\n");
782 return -ENOTCONN;
783 }
784
785 /*
786 * No need to hold the reassembly queue lock all the time as we are
787 * the only one reading from the front of the queue. The transport
788 * may add more entries to the back of the queue at the same time
789 */
790 if (sc->recv_io.reassembly.data_length >= size) {
791 int queue_length;
792 int queue_removed = 0;
793 unsigned long flags;
794
795 /*
796 * Need to make sure reassembly_data_length is read before
797 * reading reassembly_queue_length and calling
798 * get_first_reassembly. This call is lock free
799 * as we never read at the end of the queue which are being
800 * updated in SOFTIRQ as more data is received
801 */
802 virt_rmb();
803 queue_length = sc->recv_io.reassembly.queue_length;
804 data_read = 0;
805 to_read = size;
806 offset = sc->recv_io.reassembly.first_entry_offset;
807 while (data_read < size) {
808 recvmsg = get_first_reassembly(sc);
809 data_transfer = smbdirect_recv_io_payload(recvmsg);
810 data_length = le32_to_cpu(data_transfer->data_length);
811 remaining_data_length =
812 le32_to_cpu(data_transfer->remaining_data_length);
813 data_offset = le32_to_cpu(data_transfer->data_offset);
814
815 /*
816 * The upper layer expects RFC1002 length at the
817 * beginning of the payload. Return it to indicate
818 * the total length of the packet. This minimize the
819 * change to upper layer packet processing logic. This
820 * will be eventually remove when an intermediate
821 * transport layer is added
822 */
823 if (recvmsg->first_segment && size == 4) {
824 unsigned int rfc1002_len =
825 data_length + remaining_data_length;
826 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
827 data_read = 4;
828 recvmsg->first_segment = false;
829 ksmbd_debug(RDMA,
830 "returning rfc1002 length %d\n",
831 rfc1002_len);
832 goto read_rfc1002_done;
833 }
834
835 to_copy = min_t(int, data_length - offset, to_read);
836 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
837 to_copy);
838
839 /* move on to the next buffer? */
840 if (to_copy == data_length - offset) {
841 queue_length--;
842 /*
843 * No need to lock if we are not at the
844 * end of the queue
845 */
846 if (queue_length) {
847 list_del(&recvmsg->list);
848 } else {
849 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
850 list_del(&recvmsg->list);
851 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
852 }
853 queue_removed++;
854 put_recvmsg(sc, recvmsg);
855 offset = 0;
856 } else {
857 offset += to_copy;
858 }
859
860 to_read -= to_copy;
861 data_read += to_copy;
862 }
863
864 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
865 sc->recv_io.reassembly.data_length -= data_read;
866 sc->recv_io.reassembly.queue_length -= queue_removed;
867 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
868
869 sc->recv_io.reassembly.first_entry_offset = offset;
870 ksmbd_debug(RDMA,
871 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
872 data_read, sc->recv_io.reassembly.data_length,
873 sc->recv_io.reassembly.first_entry_offset);
874 read_rfc1002_done:
875 return data_read;
876 }
877
878 ksmbd_debug(RDMA, "wait_event on more data\n");
879 rc = wait_event_interruptible(sc->recv_io.reassembly.wait_queue,
880 sc->recv_io.reassembly.data_length >= size ||
881 sc->status != SMBDIRECT_SOCKET_CONNECTED);
882 if (rc)
883 return -EINTR;
884
885 goto again;
886 }
887
smb_direct_post_recv_credits(struct work_struct * work)888 static void smb_direct_post_recv_credits(struct work_struct *work)
889 {
890 struct smbdirect_socket *sc =
891 container_of(work, struct smbdirect_socket, recv_io.posted.refill_work);
892 struct smbdirect_recv_io *recvmsg;
893 int credits = 0;
894 int ret;
895
896 if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) {
897 while (true) {
898 recvmsg = get_free_recvmsg(sc);
899 if (!recvmsg)
900 break;
901
902 recvmsg->first_segment = false;
903
904 ret = smb_direct_post_recv(sc, recvmsg);
905 if (ret) {
906 pr_err("Can't post recv: %d\n", ret);
907 put_recvmsg(sc, recvmsg);
908 break;
909 }
910 credits++;
911
912 atomic_inc(&sc->recv_io.posted.count);
913 }
914 }
915
916 if (credits)
917 queue_work(sc->workqueue, &sc->idle.immediate_work);
918 }
919
send_done(struct ib_cq * cq,struct ib_wc * wc)920 static void send_done(struct ib_cq *cq, struct ib_wc *wc)
921 {
922 struct smbdirect_send_io *sendmsg, *sibling, *next;
923 struct smbdirect_socket *sc;
924 int lcredits = 0;
925
926 sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe);
927 sc = sendmsg->socket;
928
929 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
930 ib_wc_status_msg(wc->status), wc->status,
931 wc->opcode);
932
933 /*
934 * Free possible siblings and then the main send_io
935 */
936 list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) {
937 list_del_init(&sibling->sibling_list);
938 smb_direct_free_sendmsg(sc, sibling);
939 lcredits += 1;
940 }
941 /* Note this frees wc->wr_cqe, but not wc */
942 smb_direct_free_sendmsg(sc, sendmsg);
943 lcredits += 1;
944
945 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
946 pr_err("Send error. status='%s (%d)', opcode=%d\n",
947 ib_wc_status_msg(wc->status), wc->status,
948 wc->opcode);
949 smb_direct_disconnect_rdma_connection(sc);
950 return;
951 }
952
953 atomic_add(lcredits, &sc->send_io.lcredits.count);
954 wake_up(&sc->send_io.lcredits.wait_queue);
955
956 if (atomic_dec_and_test(&sc->send_io.pending.count))
957 wake_up(&sc->send_io.pending.zero_wait_queue);
958 }
959
manage_credits_prior_sending(struct smbdirect_socket * sc)960 static int manage_credits_prior_sending(struct smbdirect_socket *sc)
961 {
962 int new_credits;
963
964 if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target)
965 return 0;
966
967 new_credits = atomic_read(&sc->recv_io.posted.count);
968 if (new_credits == 0)
969 return 0;
970
971 new_credits -= atomic_read(&sc->recv_io.credits.count);
972 if (new_credits <= 0)
973 return 0;
974
975 atomic_add(new_credits, &sc->recv_io.credits.count);
976 return new_credits;
977 }
978
manage_keep_alive_before_sending(struct smbdirect_socket * sc)979 static int manage_keep_alive_before_sending(struct smbdirect_socket *sc)
980 {
981 struct smbdirect_socket_parameters *sp = &sc->parameters;
982
983 if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) {
984 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT;
985 /*
986 * Now use the keepalive timeout (instead of keepalive interval)
987 * in order to wait for a response
988 */
989 mod_delayed_work(sc->workqueue, &sc->idle.timer_work,
990 msecs_to_jiffies(sp->keepalive_timeout_msec));
991 return 1;
992 }
993 return 0;
994 }
995
smb_direct_post_send(struct smbdirect_socket * sc,struct ib_send_wr * wr)996 static int smb_direct_post_send(struct smbdirect_socket *sc,
997 struct ib_send_wr *wr)
998 {
999 int ret;
1000
1001 atomic_inc(&sc->send_io.pending.count);
1002 ret = ib_post_send(sc->ib.qp, wr, NULL);
1003 if (ret) {
1004 pr_err("failed to post send: %d\n", ret);
1005 smb_direct_disconnect_rdma_connection(sc);
1006 }
1007 return ret;
1008 }
1009
smb_direct_send_ctx_init(struct smbdirect_send_batch * send_ctx,bool need_invalidate_rkey,unsigned int remote_key)1010 static void smb_direct_send_ctx_init(struct smbdirect_send_batch *send_ctx,
1011 bool need_invalidate_rkey,
1012 unsigned int remote_key)
1013 {
1014 INIT_LIST_HEAD(&send_ctx->msg_list);
1015 send_ctx->wr_cnt = 0;
1016 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
1017 send_ctx->remote_key = remote_key;
1018 }
1019
smb_direct_flush_send_list(struct smbdirect_socket * sc,struct smbdirect_send_batch * send_ctx,bool is_last)1020 static int smb_direct_flush_send_list(struct smbdirect_socket *sc,
1021 struct smbdirect_send_batch *send_ctx,
1022 bool is_last)
1023 {
1024 struct smbdirect_send_io *first, *last;
1025 int ret;
1026
1027 if (list_empty(&send_ctx->msg_list))
1028 return 0;
1029
1030 first = list_first_entry(&send_ctx->msg_list,
1031 struct smbdirect_send_io,
1032 sibling_list);
1033 last = list_last_entry(&send_ctx->msg_list,
1034 struct smbdirect_send_io,
1035 sibling_list);
1036
1037 if (send_ctx->need_invalidate_rkey) {
1038 first->wr.opcode = IB_WR_SEND_WITH_INV;
1039 first->wr.ex.invalidate_rkey = send_ctx->remote_key;
1040 send_ctx->need_invalidate_rkey = false;
1041 send_ctx->remote_key = 0;
1042 }
1043
1044 last->wr.send_flags = IB_SEND_SIGNALED;
1045 last->wr.wr_cqe = &last->cqe;
1046
1047 /*
1048 * Remove last from send_ctx->msg_list
1049 * and splice the rest of send_ctx->msg_list
1050 * to last->sibling_list.
1051 *
1052 * send_ctx->msg_list is a valid empty list
1053 * at the end.
1054 */
1055 list_del_init(&last->sibling_list);
1056 list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list);
1057 send_ctx->wr_cnt = 0;
1058
1059 ret = smb_direct_post_send(sc, &first->wr);
1060 if (ret) {
1061 struct smbdirect_send_io *sibling, *next;
1062
1063 list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) {
1064 list_del_init(&sibling->sibling_list);
1065 smb_direct_free_sendmsg(sc, sibling);
1066 }
1067 smb_direct_free_sendmsg(sc, last);
1068 }
1069
1070 return ret;
1071 }
1072
wait_for_credits(struct smbdirect_socket * sc,wait_queue_head_t * waitq,atomic_t * total_credits,int needed)1073 static int wait_for_credits(struct smbdirect_socket *sc,
1074 wait_queue_head_t *waitq, atomic_t *total_credits,
1075 int needed)
1076 {
1077 int ret;
1078
1079 do {
1080 if (atomic_sub_return(needed, total_credits) >= 0)
1081 return 0;
1082
1083 atomic_add(needed, total_credits);
1084 ret = wait_event_interruptible(*waitq,
1085 atomic_read(total_credits) >= needed ||
1086 sc->status != SMBDIRECT_SOCKET_CONNECTED);
1087
1088 if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
1089 return -ENOTCONN;
1090 else if (ret < 0)
1091 return ret;
1092 } while (true);
1093 }
1094
wait_for_send_lcredit(struct smbdirect_socket * sc,struct smbdirect_send_batch * send_ctx)1095 static int wait_for_send_lcredit(struct smbdirect_socket *sc,
1096 struct smbdirect_send_batch *send_ctx)
1097 {
1098 if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) {
1099 int ret;
1100
1101 ret = smb_direct_flush_send_list(sc, send_ctx, false);
1102 if (ret)
1103 return ret;
1104 }
1105
1106 return wait_for_credits(sc,
1107 &sc->send_io.lcredits.wait_queue,
1108 &sc->send_io.lcredits.count,
1109 1);
1110 }
1111
wait_for_send_credits(struct smbdirect_socket * sc,struct smbdirect_send_batch * send_ctx)1112 static int wait_for_send_credits(struct smbdirect_socket *sc,
1113 struct smbdirect_send_batch *send_ctx)
1114 {
1115 int ret;
1116
1117 if (send_ctx &&
1118 (send_ctx->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) {
1119 ret = smb_direct_flush_send_list(sc, send_ctx, false);
1120 if (ret)
1121 return ret;
1122 }
1123
1124 return wait_for_credits(sc, &sc->send_io.credits.wait_queue, &sc->send_io.credits.count, 1);
1125 }
1126
wait_for_rw_credits(struct smbdirect_socket * sc,int credits)1127 static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits)
1128 {
1129 return wait_for_credits(sc,
1130 &sc->rw_io.credits.wait_queue,
1131 &sc->rw_io.credits.count,
1132 credits);
1133 }
1134
calc_rw_credits(struct smbdirect_socket * sc,char * buf,unsigned int len)1135 static int calc_rw_credits(struct smbdirect_socket *sc,
1136 char *buf, unsigned int len)
1137 {
1138 return DIV_ROUND_UP(get_buf_page_count(buf, len),
1139 sc->rw_io.credits.num_pages);
1140 }
1141
smb_direct_create_header(struct smbdirect_socket * sc,int size,int remaining_data_length,struct smbdirect_send_io ** sendmsg_out)1142 static int smb_direct_create_header(struct smbdirect_socket *sc,
1143 int size, int remaining_data_length,
1144 struct smbdirect_send_io **sendmsg_out)
1145 {
1146 struct smbdirect_socket_parameters *sp = &sc->parameters;
1147 struct smbdirect_send_io *sendmsg;
1148 struct smbdirect_data_transfer *packet;
1149 int header_length;
1150 int ret;
1151
1152 sendmsg = smb_direct_alloc_sendmsg(sc);
1153 if (IS_ERR(sendmsg))
1154 return PTR_ERR(sendmsg);
1155
1156 /* Fill in the packet header */
1157 packet = (struct smbdirect_data_transfer *)sendmsg->packet;
1158 packet->credits_requested = cpu_to_le16(sp->send_credit_target);
1159 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc));
1160
1161 packet->flags = 0;
1162 if (manage_keep_alive_before_sending(sc))
1163 packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED);
1164
1165 packet->reserved = 0;
1166 if (!size)
1167 packet->data_offset = 0;
1168 else
1169 packet->data_offset = cpu_to_le32(24);
1170 packet->data_length = cpu_to_le32(size);
1171 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1172 packet->padding = 0;
1173
1174 ksmbd_debug(RDMA,
1175 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1176 le16_to_cpu(packet->credits_requested),
1177 le16_to_cpu(packet->credits_granted),
1178 le32_to_cpu(packet->data_offset),
1179 le32_to_cpu(packet->data_length),
1180 le32_to_cpu(packet->remaining_data_length));
1181
1182 /* Map the packet to DMA */
1183 header_length = sizeof(struct smbdirect_data_transfer);
1184 /* If this is a packet without payload, don't send padding */
1185 if (!size)
1186 header_length =
1187 offsetof(struct smbdirect_data_transfer, padding);
1188
1189 sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev,
1190 (void *)packet,
1191 header_length,
1192 DMA_TO_DEVICE);
1193 ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr);
1194 if (ret) {
1195 smb_direct_free_sendmsg(sc, sendmsg);
1196 return ret;
1197 }
1198
1199 sendmsg->num_sge = 1;
1200 sendmsg->sge[0].length = header_length;
1201 sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey;
1202
1203 *sendmsg_out = sendmsg;
1204 return 0;
1205 }
1206
get_sg_list(void * buf,int size,struct scatterlist * sg_list,int nentries)1207 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
1208 {
1209 bool high = is_vmalloc_addr(buf);
1210 struct page *page;
1211 int offset, len;
1212 int i = 0;
1213
1214 if (size <= 0 || nentries < get_buf_page_count(buf, size))
1215 return -EINVAL;
1216
1217 offset = offset_in_page(buf);
1218 buf -= offset;
1219 while (size > 0) {
1220 len = min_t(int, PAGE_SIZE - offset, size);
1221 if (high)
1222 page = vmalloc_to_page(buf);
1223 else
1224 page = kmap_to_page(buf);
1225
1226 if (!sg_list)
1227 return -EINVAL;
1228 sg_set_page(sg_list, page, len, offset);
1229 sg_list = sg_next(sg_list);
1230
1231 buf += PAGE_SIZE;
1232 size -= len;
1233 offset = 0;
1234 i++;
1235 }
1236 return i;
1237 }
1238
get_mapped_sg_list(struct ib_device * device,void * buf,int size,struct scatterlist * sg_list,int nentries,enum dma_data_direction dir)1239 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
1240 struct scatterlist *sg_list, int nentries,
1241 enum dma_data_direction dir)
1242 {
1243 int npages;
1244
1245 npages = get_sg_list(buf, size, sg_list, nentries);
1246 if (npages < 0)
1247 return -EINVAL;
1248 return ib_dma_map_sg(device, sg_list, npages, dir);
1249 }
1250
post_sendmsg(struct smbdirect_socket * sc,struct smbdirect_send_batch * send_ctx,struct smbdirect_send_io * msg)1251 static int post_sendmsg(struct smbdirect_socket *sc,
1252 struct smbdirect_send_batch *send_ctx,
1253 struct smbdirect_send_io *msg)
1254 {
1255 int i;
1256
1257 for (i = 0; i < msg->num_sge; i++)
1258 ib_dma_sync_single_for_device(sc->ib.dev,
1259 msg->sge[i].addr, msg->sge[i].length,
1260 DMA_TO_DEVICE);
1261
1262 msg->cqe.done = send_done;
1263 msg->wr.opcode = IB_WR_SEND;
1264 msg->wr.sg_list = &msg->sge[0];
1265 msg->wr.num_sge = msg->num_sge;
1266 msg->wr.next = NULL;
1267
1268 if (send_ctx) {
1269 msg->wr.wr_cqe = NULL;
1270 msg->wr.send_flags = 0;
1271 if (!list_empty(&send_ctx->msg_list)) {
1272 struct smbdirect_send_io *last;
1273
1274 last = list_last_entry(&send_ctx->msg_list,
1275 struct smbdirect_send_io,
1276 sibling_list);
1277 last->wr.next = &msg->wr;
1278 }
1279 list_add_tail(&msg->sibling_list, &send_ctx->msg_list);
1280 send_ctx->wr_cnt++;
1281 return 0;
1282 }
1283
1284 msg->wr.wr_cqe = &msg->cqe;
1285 msg->wr.send_flags = IB_SEND_SIGNALED;
1286 return smb_direct_post_send(sc, &msg->wr);
1287 }
1288
smb_direct_post_send_data(struct smbdirect_socket * sc,struct smbdirect_send_batch * send_ctx,struct kvec * iov,int niov,int remaining_data_length)1289 static int smb_direct_post_send_data(struct smbdirect_socket *sc,
1290 struct smbdirect_send_batch *send_ctx,
1291 struct kvec *iov, int niov,
1292 int remaining_data_length)
1293 {
1294 int i, j, ret;
1295 struct smbdirect_send_io *msg;
1296 int data_length;
1297 struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1];
1298
1299 ret = wait_for_send_lcredit(sc, send_ctx);
1300 if (ret)
1301 goto lcredit_failed;
1302
1303 ret = wait_for_send_credits(sc, send_ctx);
1304 if (ret)
1305 goto credit_failed;
1306
1307 data_length = 0;
1308 for (i = 0; i < niov; i++)
1309 data_length += iov[i].iov_len;
1310
1311 ret = smb_direct_create_header(sc, data_length, remaining_data_length,
1312 &msg);
1313 if (ret)
1314 goto header_failed;
1315
1316 for (i = 0; i < niov; i++) {
1317 struct ib_sge *sge;
1318 int sg_cnt;
1319
1320 sg_init_table(sg, SMBDIRECT_SEND_IO_MAX_SGE - 1);
1321 sg_cnt = get_mapped_sg_list(sc->ib.dev,
1322 iov[i].iov_base, iov[i].iov_len,
1323 sg, SMBDIRECT_SEND_IO_MAX_SGE - 1,
1324 DMA_TO_DEVICE);
1325 if (sg_cnt <= 0) {
1326 pr_err("failed to map buffer\n");
1327 ret = -ENOMEM;
1328 goto err;
1329 } else if (sg_cnt + msg->num_sge > SMBDIRECT_SEND_IO_MAX_SGE) {
1330 pr_err("buffer not fitted into sges\n");
1331 ret = -E2BIG;
1332 ib_dma_unmap_sg(sc->ib.dev, sg, sg_cnt,
1333 DMA_TO_DEVICE);
1334 goto err;
1335 }
1336
1337 for (j = 0; j < sg_cnt; j++) {
1338 sge = &msg->sge[msg->num_sge];
1339 sge->addr = sg_dma_address(&sg[j]);
1340 sge->length = sg_dma_len(&sg[j]);
1341 sge->lkey = sc->ib.pd->local_dma_lkey;
1342 msg->num_sge++;
1343 }
1344 }
1345
1346 ret = post_sendmsg(sc, send_ctx, msg);
1347 if (ret)
1348 goto err;
1349 return 0;
1350 err:
1351 smb_direct_free_sendmsg(sc, msg);
1352 header_failed:
1353 atomic_inc(&sc->send_io.credits.count);
1354 credit_failed:
1355 atomic_inc(&sc->send_io.lcredits.count);
1356 lcredit_failed:
1357 return ret;
1358 }
1359
smb_direct_writev(struct ksmbd_transport * t,struct kvec * iov,int niovs,int buflen,bool need_invalidate,unsigned int remote_key)1360 static int smb_direct_writev(struct ksmbd_transport *t,
1361 struct kvec *iov, int niovs, int buflen,
1362 bool need_invalidate, unsigned int remote_key)
1363 {
1364 struct smb_direct_transport *st = SMBD_TRANS(t);
1365 struct smbdirect_socket *sc = &st->socket;
1366 struct smbdirect_socket_parameters *sp = &sc->parameters;
1367 size_t remaining_data_length;
1368 size_t iov_idx;
1369 size_t iov_ofs;
1370 size_t max_iov_size = sp->max_send_size -
1371 sizeof(struct smbdirect_data_transfer);
1372 int ret;
1373 struct smbdirect_send_batch send_ctx;
1374 int error = 0;
1375
1376 if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
1377 return -ENOTCONN;
1378
1379 //FIXME: skip RFC1002 header..
1380 if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4))
1381 return -EINVAL;
1382 buflen -= 4;
1383 iov_idx = 1;
1384 iov_ofs = 0;
1385
1386 remaining_data_length = buflen;
1387 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1388
1389 smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key);
1390 while (remaining_data_length) {
1391 struct kvec vecs[SMBDIRECT_SEND_IO_MAX_SGE - 1]; /* minus smbdirect hdr */
1392 size_t possible_bytes = max_iov_size;
1393 size_t possible_vecs;
1394 size_t bytes = 0;
1395 size_t nvecs = 0;
1396
1397 /*
1398 * For the last message remaining_data_length should be
1399 * have been 0 already!
1400 */
1401 if (WARN_ON_ONCE(iov_idx >= niovs)) {
1402 error = -EINVAL;
1403 goto done;
1404 }
1405
1406 /*
1407 * We have 2 factors which limit the arguments we pass
1408 * to smb_direct_post_send_data():
1409 *
1410 * 1. The number of supported sges for the send,
1411 * while one is reserved for the smbdirect header.
1412 * And we currently need one SGE per page.
1413 * 2. The number of negotiated payload bytes per send.
1414 */
1415 possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx);
1416
1417 while (iov_idx < niovs && possible_vecs && possible_bytes) {
1418 struct kvec *v = &vecs[nvecs];
1419 int page_count;
1420
1421 v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs;
1422 v->iov_len = min_t(size_t,
1423 iov[iov_idx].iov_len - iov_ofs,
1424 possible_bytes);
1425 page_count = get_buf_page_count(v->iov_base, v->iov_len);
1426 if (page_count > possible_vecs) {
1427 /*
1428 * If the number of pages in the buffer
1429 * is to much (because we currently require
1430 * one SGE per page), we need to limit the
1431 * length.
1432 *
1433 * We know possible_vecs is at least 1,
1434 * so we always keep the first page.
1435 *
1436 * We need to calculate the number extra
1437 * pages (epages) we can also keep.
1438 *
1439 * We calculate the number of bytes in the
1440 * first page (fplen), this should never be
1441 * larger than v->iov_len because page_count is
1442 * at least 2, but adding a limitation feels
1443 * better.
1444 *
1445 * Then we calculate the number of bytes (elen)
1446 * we can keep for the extra pages.
1447 */
1448 size_t epages = possible_vecs - 1;
1449 size_t fpofs = offset_in_page(v->iov_base);
1450 size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len);
1451 size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE);
1452
1453 v->iov_len = fplen + elen;
1454 page_count = get_buf_page_count(v->iov_base, v->iov_len);
1455 if (WARN_ON_ONCE(page_count > possible_vecs)) {
1456 /*
1457 * Something went wrong in the above
1458 * logic...
1459 */
1460 error = -EINVAL;
1461 goto done;
1462 }
1463 }
1464 possible_vecs -= page_count;
1465 nvecs += 1;
1466 possible_bytes -= v->iov_len;
1467 bytes += v->iov_len;
1468
1469 iov_ofs += v->iov_len;
1470 if (iov_ofs >= iov[iov_idx].iov_len) {
1471 iov_idx += 1;
1472 iov_ofs = 0;
1473 }
1474 }
1475
1476 remaining_data_length -= bytes;
1477
1478 ret = smb_direct_post_send_data(sc, &send_ctx,
1479 vecs, nvecs,
1480 remaining_data_length);
1481 if (unlikely(ret)) {
1482 error = ret;
1483 goto done;
1484 }
1485 }
1486
1487 done:
1488 ret = smb_direct_flush_send_list(sc, &send_ctx, true);
1489 if (unlikely(!ret && error))
1490 ret = error;
1491
1492 /*
1493 * As an optimization, we don't wait for individual I/O to finish
1494 * before sending the next one.
1495 * Send them all and wait for pending send count to get to 0
1496 * that means all the I/Os have been out and we are good to return
1497 */
1498
1499 wait_event(sc->send_io.pending.zero_wait_queue,
1500 atomic_read(&sc->send_io.pending.count) == 0 ||
1501 sc->status != SMBDIRECT_SOCKET_CONNECTED);
1502 if (sc->status != SMBDIRECT_SOCKET_CONNECTED && ret == 0)
1503 ret = -ENOTCONN;
1504
1505 return ret;
1506 }
1507
smb_direct_free_rdma_rw_msg(struct smb_direct_transport * t,struct smbdirect_rw_io * msg,enum dma_data_direction dir)1508 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
1509 struct smbdirect_rw_io *msg,
1510 enum dma_data_direction dir)
1511 {
1512 struct smbdirect_socket *sc = &t->socket;
1513
1514 rdma_rw_ctx_destroy(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port,
1515 msg->sgt.sgl, msg->sgt.nents, dir);
1516 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1517 kfree(msg);
1518 }
1519
read_write_done(struct ib_cq * cq,struct ib_wc * wc,enum dma_data_direction dir)1520 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
1521 enum dma_data_direction dir)
1522 {
1523 struct smbdirect_rw_io *msg =
1524 container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe);
1525 struct smbdirect_socket *sc = msg->socket;
1526
1527 if (wc->status != IB_WC_SUCCESS) {
1528 msg->error = -EIO;
1529 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1530 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
1531 if (wc->status != IB_WC_WR_FLUSH_ERR)
1532 smb_direct_disconnect_rdma_connection(sc);
1533 }
1534
1535 complete(msg->completion);
1536 }
1537
read_done(struct ib_cq * cq,struct ib_wc * wc)1538 static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1539 {
1540 read_write_done(cq, wc, DMA_FROM_DEVICE);
1541 }
1542
write_done(struct ib_cq * cq,struct ib_wc * wc)1543 static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1544 {
1545 read_write_done(cq, wc, DMA_TO_DEVICE);
1546 }
1547
smb_direct_rdma_xmit(struct smb_direct_transport * t,void * buf,int buf_len,struct smbdirect_buffer_descriptor_v1 * desc,unsigned int desc_len,bool is_read)1548 static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
1549 void *buf, int buf_len,
1550 struct smbdirect_buffer_descriptor_v1 *desc,
1551 unsigned int desc_len,
1552 bool is_read)
1553 {
1554 struct smbdirect_socket *sc = &t->socket;
1555 struct smbdirect_socket_parameters *sp = &sc->parameters;
1556 struct smbdirect_rw_io *msg, *next_msg;
1557 int i, ret;
1558 DECLARE_COMPLETION_ONSTACK(completion);
1559 struct ib_send_wr *first_wr;
1560 LIST_HEAD(msg_list);
1561 char *desc_buf;
1562 int credits_needed;
1563 unsigned int desc_buf_len, desc_num = 0;
1564
1565 if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
1566 return -ENOTCONN;
1567
1568 if (buf_len > sp->max_read_write_size)
1569 return -EINVAL;
1570
1571 /* calculate needed credits */
1572 credits_needed = 0;
1573 desc_buf = buf;
1574 for (i = 0; i < desc_len / sizeof(*desc); i++) {
1575 if (!buf_len)
1576 break;
1577
1578 desc_buf_len = le32_to_cpu(desc[i].length);
1579 if (!desc_buf_len)
1580 return -EINVAL;
1581
1582 if (desc_buf_len > buf_len) {
1583 desc_buf_len = buf_len;
1584 desc[i].length = cpu_to_le32(desc_buf_len);
1585 buf_len = 0;
1586 }
1587
1588 credits_needed += calc_rw_credits(sc, desc_buf, desc_buf_len);
1589 desc_buf += desc_buf_len;
1590 buf_len -= desc_buf_len;
1591 desc_num++;
1592 }
1593
1594 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
1595 str_read_write(is_read), buf_len, credits_needed);
1596
1597 ret = wait_for_rw_credits(sc, credits_needed);
1598 if (ret < 0)
1599 return ret;
1600
1601 /* build rdma_rw_ctx for each descriptor */
1602 desc_buf = buf;
1603 for (i = 0; i < desc_num; i++) {
1604 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE),
1605 KSMBD_DEFAULT_GFP);
1606 if (!msg) {
1607 ret = -ENOMEM;
1608 goto out;
1609 }
1610
1611 desc_buf_len = le32_to_cpu(desc[i].length);
1612
1613 msg->socket = sc;
1614 msg->cqe.done = is_read ? read_done : write_done;
1615 msg->completion = &completion;
1616
1617 msg->sgt.sgl = &msg->sg_list[0];
1618 ret = sg_alloc_table_chained(&msg->sgt,
1619 get_buf_page_count(desc_buf, desc_buf_len),
1620 msg->sg_list, SG_CHUNK_SIZE);
1621 if (ret) {
1622 ret = -ENOMEM;
1623 goto free_msg;
1624 }
1625
1626 ret = get_sg_list(desc_buf, desc_buf_len,
1627 msg->sgt.sgl, msg->sgt.orig_nents);
1628 if (ret < 0)
1629 goto free_table;
1630
1631 ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port,
1632 msg->sgt.sgl,
1633 get_buf_page_count(desc_buf, desc_buf_len),
1634 0,
1635 le64_to_cpu(desc[i].offset),
1636 le32_to_cpu(desc[i].token),
1637 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1638 if (ret < 0) {
1639 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
1640 goto free_table;
1641 }
1642
1643 list_add_tail(&msg->list, &msg_list);
1644 desc_buf += desc_buf_len;
1645 }
1646
1647 /* concatenate work requests of rdma_rw_ctxs */
1648 first_wr = NULL;
1649 list_for_each_entry_reverse(msg, &msg_list, list) {
1650 first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port,
1651 &msg->cqe, first_wr);
1652 }
1653
1654 ret = ib_post_send(sc->ib.qp, first_wr, NULL);
1655 if (ret) {
1656 pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
1657 goto out;
1658 }
1659
1660 msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list);
1661 wait_for_completion(&completion);
1662 ret = msg->error;
1663 out:
1664 list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
1665 list_del(&msg->list);
1666 smb_direct_free_rdma_rw_msg(t, msg,
1667 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1668 }
1669 atomic_add(credits_needed, &sc->rw_io.credits.count);
1670 wake_up(&sc->rw_io.credits.wait_queue);
1671 return ret;
1672
1673 free_table:
1674 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1675 free_msg:
1676 kfree(msg);
1677 goto out;
1678 }
1679
smb_direct_rdma_write(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smbdirect_buffer_descriptor_v1 * desc,unsigned int desc_len)1680 static int smb_direct_rdma_write(struct ksmbd_transport *t,
1681 void *buf, unsigned int buflen,
1682 struct smbdirect_buffer_descriptor_v1 *desc,
1683 unsigned int desc_len)
1684 {
1685 return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen,
1686 desc, desc_len, false);
1687 }
1688
smb_direct_rdma_read(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smbdirect_buffer_descriptor_v1 * desc,unsigned int desc_len)1689 static int smb_direct_rdma_read(struct ksmbd_transport *t,
1690 void *buf, unsigned int buflen,
1691 struct smbdirect_buffer_descriptor_v1 *desc,
1692 unsigned int desc_len)
1693 {
1694 return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen,
1695 desc, desc_len, true);
1696 }
1697
smb_direct_disconnect(struct ksmbd_transport * t)1698 static void smb_direct_disconnect(struct ksmbd_transport *t)
1699 {
1700 struct smb_direct_transport *st = SMBD_TRANS(t);
1701 struct smbdirect_socket *sc = &st->socket;
1702
1703 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", sc->rdma.cm_id);
1704
1705 free_transport(st);
1706 }
1707
smb_direct_shutdown(struct ksmbd_transport * t)1708 static void smb_direct_shutdown(struct ksmbd_transport *t)
1709 {
1710 struct smb_direct_transport *st = SMBD_TRANS(t);
1711 struct smbdirect_socket *sc = &st->socket;
1712
1713 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id);
1714
1715 smb_direct_disconnect_rdma_work(&sc->disconnect_work);
1716 }
1717
smb_direct_cm_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1718 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
1719 struct rdma_cm_event *event)
1720 {
1721 struct smbdirect_socket *sc = cm_id->context;
1722
1723 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
1724 cm_id, rdma_event_msg(event->event), event->event);
1725
1726 switch (event->event) {
1727 case RDMA_CM_EVENT_ESTABLISHED: {
1728 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING);
1729 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED;
1730 wake_up(&sc->status_wait);
1731 break;
1732 }
1733 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1734 case RDMA_CM_EVENT_DISCONNECTED: {
1735 sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
1736 smb_direct_disconnect_rdma_work(&sc->disconnect_work);
1737 if (sc->ib.qp)
1738 ib_drain_qp(sc->ib.qp);
1739 break;
1740 }
1741 case RDMA_CM_EVENT_CONNECT_ERROR: {
1742 sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
1743 smb_direct_disconnect_rdma_work(&sc->disconnect_work);
1744 break;
1745 }
1746 default:
1747 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1748 cm_id, rdma_event_msg(event->event),
1749 event->event);
1750 break;
1751 }
1752 return 0;
1753 }
1754
smb_direct_qpair_handler(struct ib_event * event,void * context)1755 static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1756 {
1757 struct smbdirect_socket *sc = context;
1758
1759 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
1760 sc->rdma.cm_id, ib_event_msg(event->event), event->event);
1761
1762 switch (event->event) {
1763 case IB_EVENT_CQ_ERR:
1764 case IB_EVENT_QP_FATAL:
1765 smb_direct_disconnect_rdma_connection(sc);
1766 break;
1767 default:
1768 break;
1769 }
1770 }
1771
smb_direct_send_negotiate_response(struct smbdirect_socket * sc,int failed)1772 static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc,
1773 int failed)
1774 {
1775 struct smbdirect_socket_parameters *sp = &sc->parameters;
1776 struct smbdirect_send_io *sendmsg;
1777 struct smbdirect_negotiate_resp *resp;
1778 int ret;
1779
1780 sendmsg = smb_direct_alloc_sendmsg(sc);
1781 if (IS_ERR(sendmsg))
1782 return -ENOMEM;
1783
1784 resp = (struct smbdirect_negotiate_resp *)sendmsg->packet;
1785 if (failed) {
1786 memset(resp, 0, sizeof(*resp));
1787 resp->min_version = SMB_DIRECT_VERSION_LE;
1788 resp->max_version = SMB_DIRECT_VERSION_LE;
1789 resp->status = STATUS_NOT_SUPPORTED;
1790
1791 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED;
1792 } else {
1793 resp->status = STATUS_SUCCESS;
1794 resp->min_version = SMB_DIRECT_VERSION_LE;
1795 resp->max_version = SMB_DIRECT_VERSION_LE;
1796 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1797 resp->reserved = 0;
1798 resp->credits_requested =
1799 cpu_to_le16(sp->send_credit_target);
1800 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc));
1801 resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size);
1802 resp->preferred_send_size = cpu_to_le32(sp->max_send_size);
1803 resp->max_receive_size = cpu_to_le32(sp->max_recv_size);
1804 resp->max_fragmented_size =
1805 cpu_to_le32(sp->max_fragmented_recv_size);
1806
1807 sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER;
1808 sc->status = SMBDIRECT_SOCKET_CONNECTED;
1809 }
1810
1811 sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev,
1812 (void *)resp, sizeof(*resp),
1813 DMA_TO_DEVICE);
1814 ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr);
1815 if (ret) {
1816 smb_direct_free_sendmsg(sc, sendmsg);
1817 return ret;
1818 }
1819
1820 sendmsg->num_sge = 1;
1821 sendmsg->sge[0].length = sizeof(*resp);
1822 sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey;
1823
1824 ret = post_sendmsg(sc, NULL, sendmsg);
1825 if (ret) {
1826 smb_direct_free_sendmsg(sc, sendmsg);
1827 return ret;
1828 }
1829
1830 wait_event(sc->send_io.pending.zero_wait_queue,
1831 atomic_read(&sc->send_io.pending.count) == 0 ||
1832 sc->status != SMBDIRECT_SOCKET_CONNECTED);
1833 if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
1834 return -ENOTCONN;
1835
1836 return 0;
1837 }
1838
smb_direct_accept_client(struct smbdirect_socket * sc)1839 static int smb_direct_accept_client(struct smbdirect_socket *sc)
1840 {
1841 struct smbdirect_socket_parameters *sp = &sc->parameters;
1842 struct rdma_conn_param conn_param;
1843 __be32 ird_ord_hdr[2];
1844 int ret;
1845
1846 /*
1847 * smb_direct_handle_connect_request()
1848 * already negotiated sp->initiator_depth
1849 * and sp->responder_resources
1850 */
1851 memset(&conn_param, 0, sizeof(conn_param));
1852 conn_param.initiator_depth = sp->initiator_depth;
1853 conn_param.responder_resources = sp->responder_resources;
1854
1855 if (sc->rdma.legacy_iwarp) {
1856 ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources);
1857 ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth);
1858 conn_param.private_data = ird_ord_hdr;
1859 conn_param.private_data_len = sizeof(ird_ord_hdr);
1860 } else {
1861 conn_param.private_data = NULL;
1862 conn_param.private_data_len = 0;
1863 }
1864 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1865 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1866 conn_param.flow_control = 0;
1867
1868 /*
1869 * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING
1870 * so that the timer will cause a disconnect.
1871 */
1872 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING;
1873 mod_delayed_work(sc->workqueue, &sc->idle.timer_work,
1874 msecs_to_jiffies(sp->negotiate_timeout_msec));
1875
1876 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED);
1877 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING;
1878 ret = rdma_accept(sc->rdma.cm_id, &conn_param);
1879 if (ret) {
1880 pr_err("error at rdma_accept: %d\n", ret);
1881 return ret;
1882 }
1883 return 0;
1884 }
1885
smb_direct_prepare_negotiation(struct smbdirect_socket * sc)1886 static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc)
1887 {
1888 struct smbdirect_recv_io *recvmsg;
1889 bool recv_posted = false;
1890 int ret;
1891
1892 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED);
1893 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED;
1894
1895 sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ;
1896
1897 recvmsg = get_free_recvmsg(sc);
1898 if (!recvmsg)
1899 return -ENOMEM;
1900
1901 ret = smb_direct_post_recv(sc, recvmsg);
1902 if (ret) {
1903 pr_err("Can't post recv: %d\n", ret);
1904 goto out_err;
1905 }
1906 recv_posted = true;
1907
1908 ret = smb_direct_accept_client(sc);
1909 if (ret) {
1910 pr_err("Can't accept client\n");
1911 goto out_err;
1912 }
1913
1914 return 0;
1915 out_err:
1916 /*
1917 * If the recv was never posted, return it to the free list.
1918 * If it was posted, leave it alone so disconnect teardown can
1919 * drain the QP and complete it (flush) and the completion path
1920 * will unmap it exactly once.
1921 */
1922 if (!recv_posted)
1923 put_recvmsg(sc, recvmsg);
1924 return ret;
1925 }
1926
smb_direct_init_params(struct smbdirect_socket * sc)1927 static int smb_direct_init_params(struct smbdirect_socket *sc)
1928 {
1929 struct smbdirect_socket_parameters *sp = &sc->parameters;
1930 int max_send_sges;
1931 unsigned int maxpages;
1932
1933 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
1934 * SMB2 response could be mapped.
1935 */
1936 max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3;
1937 if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) {
1938 pr_err("max_send_size %d is too large\n", sp->max_send_size);
1939 return -EINVAL;
1940 }
1941
1942 atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target);
1943
1944 maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE);
1945 sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev,
1946 sc->rdma.cm_id->port_num,
1947 maxpages);
1948 sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max);
1949 /* add one extra in order to handle unaligned pages */
1950 sc->rw_io.credits.max += 1;
1951
1952 sc->recv_io.credits.target = 1;
1953
1954 atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max);
1955
1956 return 0;
1957 }
1958
smb_direct_destroy_pools(struct smbdirect_socket * sc)1959 static void smb_direct_destroy_pools(struct smbdirect_socket *sc)
1960 {
1961 struct smbdirect_recv_io *recvmsg;
1962
1963 while ((recvmsg = get_free_recvmsg(sc)))
1964 mempool_free(recvmsg, sc->recv_io.mem.pool);
1965
1966 mempool_destroy(sc->recv_io.mem.pool);
1967 sc->recv_io.mem.pool = NULL;
1968
1969 kmem_cache_destroy(sc->recv_io.mem.cache);
1970 sc->recv_io.mem.cache = NULL;
1971
1972 mempool_destroy(sc->send_io.mem.pool);
1973 sc->send_io.mem.pool = NULL;
1974
1975 kmem_cache_destroy(sc->send_io.mem.cache);
1976 sc->send_io.mem.cache = NULL;
1977 }
1978
smb_direct_create_pools(struct smbdirect_socket * sc)1979 static int smb_direct_create_pools(struct smbdirect_socket *sc)
1980 {
1981 struct smbdirect_socket_parameters *sp = &sc->parameters;
1982 char name[80];
1983 int i;
1984 struct smbdirect_recv_io *recvmsg;
1985
1986 snprintf(name, sizeof(name), "smbdirect_send_io_pool_%p", sc);
1987 sc->send_io.mem.cache = kmem_cache_create(name,
1988 sizeof(struct smbdirect_send_io) +
1989 sizeof(struct smbdirect_negotiate_resp),
1990 0, SLAB_HWCACHE_ALIGN, NULL);
1991 if (!sc->send_io.mem.cache)
1992 return -ENOMEM;
1993
1994 sc->send_io.mem.pool = mempool_create(sp->send_credit_target,
1995 mempool_alloc_slab, mempool_free_slab,
1996 sc->send_io.mem.cache);
1997 if (!sc->send_io.mem.pool)
1998 goto err;
1999
2000 snprintf(name, sizeof(name), "smbdirect_recv_io_pool_%p", sc);
2001 sc->recv_io.mem.cache = kmem_cache_create(name,
2002 sizeof(struct smbdirect_recv_io) +
2003 sp->max_recv_size,
2004 0, SLAB_HWCACHE_ALIGN, NULL);
2005 if (!sc->recv_io.mem.cache)
2006 goto err;
2007
2008 sc->recv_io.mem.pool =
2009 mempool_create(sp->recv_credit_max, mempool_alloc_slab,
2010 mempool_free_slab, sc->recv_io.mem.cache);
2011 if (!sc->recv_io.mem.pool)
2012 goto err;
2013
2014 for (i = 0; i < sp->recv_credit_max; i++) {
2015 recvmsg = mempool_alloc(sc->recv_io.mem.pool, KSMBD_DEFAULT_GFP);
2016 if (!recvmsg)
2017 goto err;
2018 recvmsg->socket = sc;
2019 recvmsg->sge.length = 0;
2020 list_add(&recvmsg->list, &sc->recv_io.free.list);
2021 }
2022
2023 return 0;
2024 err:
2025 smb_direct_destroy_pools(sc);
2026 return -ENOMEM;
2027 }
2028
smb_direct_rdma_rw_send_wrs(struct ib_device * dev,const struct ib_qp_init_attr * attr)2029 static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr)
2030 {
2031 /*
2032 * This could be split out of rdma_rw_init_qp()
2033 * and be a helper function next to rdma_rw_mr_factor()
2034 *
2035 * We can't check unlikely(rdma_rw_force_mr) here,
2036 * but that is most likely 0 anyway.
2037 */
2038 u32 factor;
2039
2040 WARN_ON_ONCE(attr->port_num == 0);
2041
2042 /*
2043 * Each context needs at least one RDMA READ or WRITE WR.
2044 *
2045 * For some hardware we might need more, eventually we should ask the
2046 * HCA driver for a multiplier here.
2047 */
2048 factor = 1;
2049
2050 /*
2051 * If the device needs MRs to perform RDMA READ or WRITE operations,
2052 * we'll need two additional MRs for the registrations and the
2053 * invalidation.
2054 */
2055 if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd)
2056 factor += 2; /* inv + reg */
2057
2058 return factor * attr->cap.max_rdma_ctxs;
2059 }
2060
smb_direct_create_qpair(struct smbdirect_socket * sc)2061 static int smb_direct_create_qpair(struct smbdirect_socket *sc)
2062 {
2063 struct smbdirect_socket_parameters *sp = &sc->parameters;
2064 int ret;
2065 struct ib_qp_cap qp_cap;
2066 struct ib_qp_init_attr qp_attr;
2067 u32 max_send_wr;
2068 u32 rdma_send_wr;
2069
2070 /*
2071 * Note that {rdma,ib}_create_qp() will call
2072 * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0.
2073 * It will adjust cap->max_send_wr to the required
2074 * number of additional WRs for the RDMA RW operations.
2075 * It will cap cap->max_send_wr to the device limit.
2076 *
2077 * +1 for ib_drain_qp
2078 */
2079 qp_cap.max_send_wr = sp->send_credit_target + 1;
2080 qp_cap.max_recv_wr = sp->recv_credit_max + 1;
2081 qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
2082 qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
2083 qp_cap.max_inline_data = 0;
2084 qp_cap.max_rdma_ctxs = sc->rw_io.credits.max;
2085
2086 /*
2087 * Find out the number of max_send_wr
2088 * after rdma_rw_init_qp() adjusted it.
2089 *
2090 * We only do it on a temporary variable,
2091 * as rdma_create_qp() will trigger
2092 * rdma_rw_init_qp() again.
2093 */
2094 memset(&qp_attr, 0, sizeof(qp_attr));
2095 qp_attr.cap = qp_cap;
2096 qp_attr.port_num = sc->rdma.cm_id->port_num;
2097 rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr);
2098 max_send_wr = qp_cap.max_send_wr + rdma_send_wr;
2099
2100 if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe ||
2101 qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) {
2102 pr_err("Possible CQE overrun: max_send_wr %d\n",
2103 qp_cap.max_send_wr);
2104 pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
2105 IB_DEVICE_NAME_MAX,
2106 sc->ib.dev->name,
2107 sc->ib.dev->attrs.max_cqe,
2108 sc->ib.dev->attrs.max_qp_wr);
2109 pr_err("consider lowering send_credit_target = %d\n",
2110 sp->send_credit_target);
2111 return -EINVAL;
2112 }
2113
2114 if (qp_cap.max_rdma_ctxs &&
2115 (max_send_wr >= sc->ib.dev->attrs.max_cqe ||
2116 max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) {
2117 pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n",
2118 rdma_send_wr, qp_cap.max_send_wr, max_send_wr);
2119 pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
2120 IB_DEVICE_NAME_MAX,
2121 sc->ib.dev->name,
2122 sc->ib.dev->attrs.max_cqe,
2123 sc->ib.dev->attrs.max_qp_wr);
2124 pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n",
2125 sp->send_credit_target, qp_cap.max_rdma_ctxs);
2126 return -EINVAL;
2127 }
2128
2129 if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe ||
2130 qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) {
2131 pr_err("Possible CQE overrun: max_recv_wr %d\n",
2132 qp_cap.max_recv_wr);
2133 pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
2134 IB_DEVICE_NAME_MAX,
2135 sc->ib.dev->name,
2136 sc->ib.dev->attrs.max_cqe,
2137 sc->ib.dev->attrs.max_qp_wr);
2138 pr_err("consider lowering receive_credit_max = %d\n",
2139 sp->recv_credit_max);
2140 return -EINVAL;
2141 }
2142
2143 if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge ||
2144 qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) {
2145 pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
2146 IB_DEVICE_NAME_MAX,
2147 sc->ib.dev->name,
2148 sc->ib.dev->attrs.max_send_sge,
2149 sc->ib.dev->attrs.max_recv_sge);
2150 return -EINVAL;
2151 }
2152
2153 sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0);
2154 if (IS_ERR(sc->ib.pd)) {
2155 pr_err("Can't create RDMA PD\n");
2156 ret = PTR_ERR(sc->ib.pd);
2157 sc->ib.pd = NULL;
2158 return ret;
2159 }
2160
2161 sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc,
2162 max_send_wr,
2163 IB_POLL_WORKQUEUE);
2164 if (IS_ERR(sc->ib.send_cq)) {
2165 pr_err("Can't create RDMA send CQ\n");
2166 ret = PTR_ERR(sc->ib.send_cq);
2167 sc->ib.send_cq = NULL;
2168 goto err;
2169 }
2170
2171 sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc,
2172 qp_cap.max_recv_wr,
2173 IB_POLL_WORKQUEUE);
2174 if (IS_ERR(sc->ib.recv_cq)) {
2175 pr_err("Can't create RDMA recv CQ\n");
2176 ret = PTR_ERR(sc->ib.recv_cq);
2177 sc->ib.recv_cq = NULL;
2178 goto err;
2179 }
2180
2181 /*
2182 * We reset completely here!
2183 * As the above use was just temporary
2184 * to calc max_send_wr and rdma_send_wr.
2185 *
2186 * rdma_create_qp() will trigger rdma_rw_init_qp()
2187 * again if max_rdma_ctxs is not 0.
2188 */
2189 memset(&qp_attr, 0, sizeof(qp_attr));
2190 qp_attr.event_handler = smb_direct_qpair_handler;
2191 qp_attr.qp_context = sc;
2192 qp_attr.cap = qp_cap;
2193 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
2194 qp_attr.qp_type = IB_QPT_RC;
2195 qp_attr.send_cq = sc->ib.send_cq;
2196 qp_attr.recv_cq = sc->ib.recv_cq;
2197 qp_attr.port_num = ~0;
2198
2199 ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr);
2200 if (ret) {
2201 pr_err("Can't create RDMA QP: %d\n", ret);
2202 goto err;
2203 }
2204
2205 sc->ib.qp = sc->rdma.cm_id->qp;
2206 sc->rdma.cm_id->event_handler = smb_direct_cm_handler;
2207
2208 return 0;
2209 err:
2210 if (sc->ib.qp) {
2211 sc->ib.qp = NULL;
2212 rdma_destroy_qp(sc->rdma.cm_id);
2213 }
2214 if (sc->ib.recv_cq) {
2215 ib_destroy_cq(sc->ib.recv_cq);
2216 sc->ib.recv_cq = NULL;
2217 }
2218 if (sc->ib.send_cq) {
2219 ib_destroy_cq(sc->ib.send_cq);
2220 sc->ib.send_cq = NULL;
2221 }
2222 if (sc->ib.pd) {
2223 ib_dealloc_pd(sc->ib.pd);
2224 sc->ib.pd = NULL;
2225 }
2226 return ret;
2227 }
2228
smb_direct_prepare(struct ksmbd_transport * t)2229 static int smb_direct_prepare(struct ksmbd_transport *t)
2230 {
2231 struct smb_direct_transport *st = SMBD_TRANS(t);
2232 struct smbdirect_socket *sc = &st->socket;
2233 struct smbdirect_socket_parameters *sp = &sc->parameters;
2234 struct smbdirect_recv_io *recvmsg;
2235 struct smbdirect_negotiate_req *req;
2236 unsigned long flags;
2237 int ret;
2238
2239 /*
2240 * We are waiting to pass the following states:
2241 *
2242 * SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED
2243 * SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING
2244 * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED
2245 *
2246 * To finally get to SMBDIRECT_SOCKET_NEGOTIATE_RUNNING
2247 * in order to continue below.
2248 *
2249 * Everything else is unexpected and an error.
2250 */
2251 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
2252 ret = wait_event_interruptible_timeout(sc->status_wait,
2253 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED &&
2254 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING &&
2255 sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED,
2256 msecs_to_jiffies(sp->negotiate_timeout_msec));
2257 if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)
2258 return ret < 0 ? ret : -ETIMEDOUT;
2259
2260 recvmsg = get_first_reassembly(sc);
2261 if (!recvmsg)
2262 return -ECONNABORTED;
2263
2264 ret = smb_direct_check_recvmsg(recvmsg);
2265 if (ret)
2266 goto put;
2267
2268 req = (struct smbdirect_negotiate_req *)recvmsg->packet;
2269 sp->max_recv_size = min_t(int, sp->max_recv_size,
2270 le32_to_cpu(req->preferred_send_size));
2271 sp->max_send_size = min_t(int, sp->max_send_size,
2272 le32_to_cpu(req->max_receive_size));
2273 sp->max_fragmented_send_size =
2274 le32_to_cpu(req->max_fragmented_size);
2275 sp->max_fragmented_recv_size =
2276 (sp->recv_credit_max * sp->max_recv_size) / 2;
2277 sc->recv_io.credits.target = le16_to_cpu(req->credits_requested);
2278 sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max);
2279 sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1);
2280
2281 put:
2282 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
2283 sc->recv_io.reassembly.queue_length--;
2284 list_del(&recvmsg->list);
2285 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
2286 put_recvmsg(sc, recvmsg);
2287
2288 if (ret == -ECONNABORTED)
2289 return ret;
2290
2291 if (ret)
2292 goto respond;
2293
2294 /*
2295 * We negotiated with success, so we need to refill the recv queue.
2296 * We do that with sc->idle.immediate_work still being disabled
2297 * via smbdirect_socket_init(), so that queue_work(sc->workqueue,
2298 * &sc->idle.immediate_work) in smb_direct_post_recv_credits()
2299 * is a no-op.
2300 *
2301 * The message that grants the credits to the client is
2302 * the negotiate response.
2303 */
2304 INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits);
2305 smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work);
2306 if (unlikely(sc->first_error))
2307 return sc->first_error;
2308 INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work);
2309
2310 respond:
2311 ret = smb_direct_send_negotiate_response(sc, ret);
2312
2313 return ret;
2314 }
2315
smb_direct_connect(struct smbdirect_socket * sc)2316 static int smb_direct_connect(struct smbdirect_socket *sc)
2317 {
2318 int ret;
2319
2320 ret = smb_direct_init_params(sc);
2321 if (ret) {
2322 pr_err("Can't configure RDMA parameters\n");
2323 return ret;
2324 }
2325
2326 ret = smb_direct_create_pools(sc);
2327 if (ret) {
2328 pr_err("Can't init RDMA pool: %d\n", ret);
2329 return ret;
2330 }
2331
2332 ret = smb_direct_create_qpair(sc);
2333 if (ret) {
2334 pr_err("Can't accept RDMA client: %d\n", ret);
2335 return ret;
2336 }
2337
2338 ret = smb_direct_prepare_negotiation(sc);
2339 if (ret) {
2340 pr_err("Can't negotiate: %d\n", ret);
2341 return ret;
2342 }
2343 return 0;
2344 }
2345
rdma_frwr_is_supported(struct ib_device_attr * attrs)2346 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
2347 {
2348 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
2349 return false;
2350 if (attrs->max_fast_reg_page_list_len == 0)
2351 return false;
2352 return true;
2353 }
2354
smb_direct_handle_connect_request(struct rdma_cm_id * new_cm_id,struct rdma_cm_event * event)2355 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id,
2356 struct rdma_cm_event *event)
2357 {
2358 struct smb_direct_transport *t;
2359 struct smbdirect_socket *sc;
2360 struct smbdirect_socket_parameters *sp;
2361 struct task_struct *handler;
2362 u8 peer_initiator_depth;
2363 u8 peer_responder_resources;
2364 int ret;
2365
2366 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
2367 ksmbd_debug(RDMA,
2368 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
2369 new_cm_id->device->attrs.device_cap_flags);
2370 return -EPROTONOSUPPORT;
2371 }
2372
2373 t = alloc_transport(new_cm_id);
2374 if (!t)
2375 return -ENOMEM;
2376 sc = &t->socket;
2377 sp = &sc->parameters;
2378
2379 peer_initiator_depth = event->param.conn.initiator_depth;
2380 peer_responder_resources = event->param.conn.responder_resources;
2381 if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) &&
2382 event->param.conn.private_data_len == 8) {
2383 /*
2384 * Legacy clients with only iWarp MPA v1 support
2385 * need a private blob in order to negotiate
2386 * the IRD/ORD values.
2387 */
2388 const __be32 *ird_ord_hdr = event->param.conn.private_data;
2389 u32 ird32 = be32_to_cpu(ird_ord_hdr[0]);
2390 u32 ord32 = be32_to_cpu(ird_ord_hdr[1]);
2391
2392 /*
2393 * cifs.ko sends the legacy IRD/ORD negotiation
2394 * event if iWarp MPA v2 was used.
2395 *
2396 * Here we check that the values match and only
2397 * mark the client as legacy if they don't match.
2398 */
2399 if ((u32)event->param.conn.initiator_depth != ird32 ||
2400 (u32)event->param.conn.responder_resources != ord32) {
2401 /*
2402 * There are broken clients (old cifs.ko)
2403 * using little endian and also
2404 * struct rdma_conn_param only uses u8
2405 * for initiator_depth and responder_resources,
2406 * so we truncate the value to U8_MAX.
2407 *
2408 * smb_direct_accept_client() will then
2409 * do the real negotiation in order to
2410 * select the minimum between client and
2411 * server.
2412 */
2413 ird32 = min_t(u32, ird32, U8_MAX);
2414 ord32 = min_t(u32, ord32, U8_MAX);
2415
2416 sc->rdma.legacy_iwarp = true;
2417 peer_initiator_depth = (u8)ird32;
2418 peer_responder_resources = (u8)ord32;
2419 }
2420 }
2421
2422 /*
2423 * First set what the we as server are able to support
2424 */
2425 sp->initiator_depth = min_t(u8, sp->initiator_depth,
2426 new_cm_id->device->attrs.max_qp_rd_atom);
2427
2428 /*
2429 * negotiate the value by using the minimum
2430 * between client and server if the client provided
2431 * non 0 values.
2432 */
2433 if (peer_initiator_depth != 0)
2434 sp->initiator_depth = min_t(u8, sp->initiator_depth,
2435 peer_initiator_depth);
2436 if (peer_responder_resources != 0)
2437 sp->responder_resources = min_t(u8, sp->responder_resources,
2438 peer_responder_resources);
2439
2440 ret = smb_direct_connect(sc);
2441 if (ret)
2442 goto out_err;
2443
2444 handler = kthread_run(ksmbd_conn_handler_loop,
2445 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
2446 smb_direct_port);
2447 if (IS_ERR(handler)) {
2448 ret = PTR_ERR(handler);
2449 pr_err("Can't start thread\n");
2450 goto out_err;
2451 }
2452
2453 return 0;
2454 out_err:
2455 free_transport(t);
2456 return ret;
2457 }
2458
smb_direct_listen_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)2459 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
2460 struct rdma_cm_event *event)
2461 {
2462 switch (event->event) {
2463 case RDMA_CM_EVENT_CONNECT_REQUEST: {
2464 int ret = smb_direct_handle_connect_request(cm_id, event);
2465
2466 if (ret) {
2467 pr_err("Can't create transport: %d\n", ret);
2468 return ret;
2469 }
2470
2471 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
2472 cm_id);
2473 break;
2474 }
2475 default:
2476 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
2477 cm_id, rdma_event_msg(event->event), event->event);
2478 break;
2479 }
2480 return 0;
2481 }
2482
smb_direct_listen(int port)2483 static int smb_direct_listen(int port)
2484 {
2485 int ret;
2486 struct rdma_cm_id *cm_id;
2487 struct sockaddr_in sin = {
2488 .sin_family = AF_INET,
2489 .sin_addr.s_addr = htonl(INADDR_ANY),
2490 .sin_port = htons(port),
2491 };
2492
2493 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
2494 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
2495 if (IS_ERR(cm_id)) {
2496 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
2497 return PTR_ERR(cm_id);
2498 }
2499
2500 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
2501 if (ret) {
2502 pr_err("Can't bind: %d\n", ret);
2503 goto err;
2504 }
2505
2506 smb_direct_listener.cm_id = cm_id;
2507
2508 ret = rdma_listen(cm_id, 10);
2509 if (ret) {
2510 pr_err("Can't listen: %d\n", ret);
2511 goto err;
2512 }
2513 return 0;
2514 err:
2515 smb_direct_listener.cm_id = NULL;
2516 rdma_destroy_id(cm_id);
2517 return ret;
2518 }
2519
smb_direct_ib_client_add(struct ib_device * ib_dev)2520 static int smb_direct_ib_client_add(struct ib_device *ib_dev)
2521 {
2522 struct smb_direct_device *smb_dev;
2523
2524 /* Set 5445 port if device type is iWARP(No IB) */
2525 if (ib_dev->node_type != RDMA_NODE_IB_CA)
2526 smb_direct_port = SMB_DIRECT_PORT_IWARP;
2527
2528 if (!rdma_frwr_is_supported(&ib_dev->attrs))
2529 return 0;
2530
2531 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP);
2532 if (!smb_dev)
2533 return -ENOMEM;
2534 smb_dev->ib_dev = ib_dev;
2535
2536 write_lock(&smb_direct_device_lock);
2537 list_add(&smb_dev->list, &smb_direct_device_list);
2538 write_unlock(&smb_direct_device_lock);
2539
2540 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
2541 return 0;
2542 }
2543
smb_direct_ib_client_remove(struct ib_device * ib_dev,void * client_data)2544 static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
2545 void *client_data)
2546 {
2547 struct smb_direct_device *smb_dev, *tmp;
2548
2549 write_lock(&smb_direct_device_lock);
2550 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
2551 if (smb_dev->ib_dev == ib_dev) {
2552 list_del(&smb_dev->list);
2553 kfree(smb_dev);
2554 break;
2555 }
2556 }
2557 write_unlock(&smb_direct_device_lock);
2558 }
2559
2560 static struct ib_client smb_direct_ib_client = {
2561 .name = "ksmbd_smb_direct_ib",
2562 .add = smb_direct_ib_client_add,
2563 .remove = smb_direct_ib_client_remove,
2564 };
2565
ksmbd_rdma_init(void)2566 int ksmbd_rdma_init(void)
2567 {
2568 int ret;
2569
2570 smb_direct_listener.cm_id = NULL;
2571
2572 ret = ib_register_client(&smb_direct_ib_client);
2573 if (ret) {
2574 pr_err("failed to ib_register_client\n");
2575 return ret;
2576 }
2577
2578 /* When a client is running out of send credits, the credits are
2579 * granted by the server's sending a packet using this queue.
2580 * This avoids the situation that a clients cannot send packets
2581 * for lack of credits
2582 */
2583 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
2584 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU,
2585 0);
2586 if (!smb_direct_wq)
2587 return -ENOMEM;
2588
2589 ret = smb_direct_listen(smb_direct_port);
2590 if (ret) {
2591 destroy_workqueue(smb_direct_wq);
2592 smb_direct_wq = NULL;
2593 pr_err("Can't listen: %d\n", ret);
2594 return ret;
2595 }
2596
2597 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
2598 smb_direct_listener.cm_id);
2599 return 0;
2600 }
2601
ksmbd_rdma_stop_listening(void)2602 void ksmbd_rdma_stop_listening(void)
2603 {
2604 if (!smb_direct_listener.cm_id)
2605 return;
2606
2607 ib_unregister_client(&smb_direct_ib_client);
2608 rdma_destroy_id(smb_direct_listener.cm_id);
2609
2610 smb_direct_listener.cm_id = NULL;
2611 }
2612
ksmbd_rdma_destroy(void)2613 void ksmbd_rdma_destroy(void)
2614 {
2615 if (smb_direct_wq) {
2616 destroy_workqueue(smb_direct_wq);
2617 smb_direct_wq = NULL;
2618 }
2619 }
2620
ksmbd_find_rdma_capable_netdev(struct net_device * netdev)2621 static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev)
2622 {
2623 struct smb_direct_device *smb_dev;
2624 int i;
2625 bool rdma_capable = false;
2626
2627 read_lock(&smb_direct_device_lock);
2628 list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
2629 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
2630 struct net_device *ndev;
2631
2632 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1);
2633 if (!ndev)
2634 continue;
2635
2636 if (ndev == netdev) {
2637 dev_put(ndev);
2638 rdma_capable = true;
2639 goto out;
2640 }
2641 dev_put(ndev);
2642 }
2643 }
2644 out:
2645 read_unlock(&smb_direct_device_lock);
2646
2647 if (rdma_capable == false) {
2648 struct ib_device *ibdev;
2649
2650 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2651 if (ibdev) {
2652 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs);
2653 ib_device_put(ibdev);
2654 }
2655 }
2656
2657 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n",
2658 netdev->name, str_true_false(rdma_capable));
2659
2660 return rdma_capable;
2661 }
2662
ksmbd_rdma_capable_netdev(struct net_device * netdev)2663 bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2664 {
2665 struct net_device *lower_dev;
2666 struct list_head *iter;
2667
2668 if (ksmbd_find_rdma_capable_netdev(netdev))
2669 return true;
2670
2671 /* check if netdev is bridge or VLAN */
2672 if (netif_is_bridge_master(netdev) ||
2673 netdev->priv_flags & IFF_802_1Q_VLAN)
2674 netdev_for_each_lower_dev(netdev, lower_dev, iter)
2675 if (ksmbd_find_rdma_capable_netdev(lower_dev))
2676 return true;
2677
2678 /* check if netdev is IPoIB safely without layer violation */
2679 if (netdev->type == ARPHRD_INFINIBAND)
2680 return true;
2681
2682 return false;
2683 }
2684
2685 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2686 .prepare = smb_direct_prepare,
2687 .disconnect = smb_direct_disconnect,
2688 .shutdown = smb_direct_shutdown,
2689 .writev = smb_direct_writev,
2690 .read = smb_direct_read,
2691 .rdma_read = smb_direct_rdma_read,
2692 .rdma_write = smb_direct_rdma_write,
2693 .free_transport = smb_direct_free_transport,
2694 };
2695