1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 */
9
10 #define SUBMOD_NAME "smb_direct"
11
12 #include <linux/kthread.h>
13 #include <linux/list.h>
14 #include <linux/mempool.h>
15 #include <linux/highmem.h>
16 #include <linux/scatterlist.h>
17 #include <linux/string_choices.h>
18 #include <rdma/ib_verbs.h>
19 #include <rdma/rdma_cm.h>
20 #include <rdma/rw.h>
21
22 #include "glob.h"
23 #include "connection.h"
24 #include "smb_common.h"
25 #include "../common/smb2status.h"
26 #include "transport_rdma.h"
27
28 #define SMB_DIRECT_PORT_IWARP 5445
29 #define SMB_DIRECT_PORT_INFINIBAND 445
30
31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
32
33 /* SMB_DIRECT negotiation timeout in seconds */
34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
35
36 #define SMB_DIRECT_MAX_SEND_SGES 6
37 #define SMB_DIRECT_MAX_RECV_SGES 1
38
39 /*
40 * Default maximum number of RDMA read/write outstanding on this connection
41 * This value is possibly decreased during QP creation on hardware limit
42 */
43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8
44
45 /* Maximum number of retries on data transfer operations */
46 #define SMB_DIRECT_CM_RETRY 6
47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
48 #define SMB_DIRECT_CM_RNR_RETRY 0
49
50 /*
51 * User configurable initial values per SMB_DIRECT transport connection
52 * as defined in [MS-SMBD] 3.1.1.1
53 * Those may change after a SMB_DIRECT negotiation
54 */
55
56 /* Set 445 port to SMB Direct port by default */
57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
58
59 /* The local peer's maximum number of credits to grant to the peer */
60 static int smb_direct_receive_credit_max = 255;
61
62 /* The remote peer's credit request of local peer */
63 static int smb_direct_send_credit_target = 255;
64
65 /* The maximum single message size can be sent to remote peer */
66 static int smb_direct_max_send_size = 1364;
67
68 /* The maximum fragmented upper-layer payload receive size supported */
69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
70
71 /* The maximum single-message size which can be received */
72 static int smb_direct_max_receive_size = 1364;
73
74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
75
76 static LIST_HEAD(smb_direct_device_list);
77 static DEFINE_RWLOCK(smb_direct_device_lock);
78
79 struct smb_direct_device {
80 struct ib_device *ib_dev;
81 struct list_head list;
82 };
83
84 static struct smb_direct_listener {
85 struct rdma_cm_id *cm_id;
86 } smb_direct_listener;
87
88 static struct workqueue_struct *smb_direct_wq;
89
90 enum smb_direct_status {
91 SMB_DIRECT_CS_NEW = 0,
92 SMB_DIRECT_CS_CONNECTED,
93 SMB_DIRECT_CS_DISCONNECTING,
94 SMB_DIRECT_CS_DISCONNECTED,
95 };
96
97 struct smb_direct_transport {
98 struct ksmbd_transport transport;
99
100 enum smb_direct_status status;
101 bool full_packet_received;
102 wait_queue_head_t wait_status;
103
104 struct rdma_cm_id *cm_id;
105 struct ib_cq *send_cq;
106 struct ib_cq *recv_cq;
107 struct ib_pd *pd;
108 struct ib_qp *qp;
109
110 int max_send_size;
111 int max_recv_size;
112 int max_fragmented_send_size;
113 int max_fragmented_recv_size;
114 int max_rdma_rw_size;
115
116 spinlock_t reassembly_queue_lock;
117 struct list_head reassembly_queue;
118 int reassembly_data_length;
119 int reassembly_queue_length;
120 int first_entry_offset;
121 wait_queue_head_t wait_reassembly_queue;
122
123 spinlock_t receive_credit_lock;
124 int recv_credits;
125 int count_avail_recvmsg;
126 int recv_credit_max;
127 int recv_credit_target;
128
129 spinlock_t recvmsg_queue_lock;
130 struct list_head recvmsg_queue;
131
132 spinlock_t empty_recvmsg_queue_lock;
133 struct list_head empty_recvmsg_queue;
134
135 int send_credit_target;
136 atomic_t send_credits;
137 spinlock_t lock_new_recv_credits;
138 int new_recv_credits;
139 int max_rw_credits;
140 int pages_per_rw_credit;
141 atomic_t rw_credits;
142
143 wait_queue_head_t wait_send_credits;
144 wait_queue_head_t wait_rw_credits;
145
146 mempool_t *sendmsg_mempool;
147 struct kmem_cache *sendmsg_cache;
148 mempool_t *recvmsg_mempool;
149 struct kmem_cache *recvmsg_cache;
150
151 wait_queue_head_t wait_send_pending;
152 atomic_t send_pending;
153
154 struct delayed_work post_recv_credits_work;
155 struct work_struct send_immediate_work;
156 struct work_struct disconnect_work;
157
158 bool negotiation_requested;
159 };
160
161 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
162 #define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \
163 struct smb_direct_transport, transport))
164 enum {
165 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
166 SMB_DIRECT_MSG_DATA_TRANSFER
167 };
168
169 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
170
171 struct smb_direct_send_ctx {
172 struct list_head msg_list;
173 int wr_cnt;
174 bool need_invalidate_rkey;
175 unsigned int remote_key;
176 };
177
178 struct smb_direct_sendmsg {
179 struct smb_direct_transport *transport;
180 struct ib_send_wr wr;
181 struct list_head list;
182 int num_sge;
183 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
184 struct ib_cqe cqe;
185 u8 packet[];
186 };
187
188 struct smb_direct_recvmsg {
189 struct smb_direct_transport *transport;
190 struct list_head list;
191 int type;
192 struct ib_sge sge;
193 struct ib_cqe cqe;
194 bool first_segment;
195 u8 packet[];
196 };
197
198 struct smb_direct_rdma_rw_msg {
199 struct smb_direct_transport *t;
200 struct ib_cqe cqe;
201 int status;
202 struct completion *completion;
203 struct list_head list;
204 struct rdma_rw_ctx rw_ctx;
205 struct sg_table sgt;
206 struct scatterlist sg_list[];
207 };
208
init_smbd_max_io_size(unsigned int sz)209 void init_smbd_max_io_size(unsigned int sz)
210 {
211 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
212 smb_direct_max_read_write_size = sz;
213 }
214
get_smbd_max_read_write_size(void)215 unsigned int get_smbd_max_read_write_size(void)
216 {
217 return smb_direct_max_read_write_size;
218 }
219
get_buf_page_count(void * buf,int size)220 static inline int get_buf_page_count(void *buf, int size)
221 {
222 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
223 (uintptr_t)buf / PAGE_SIZE;
224 }
225
226 static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
227 static void smb_direct_post_recv_credits(struct work_struct *work);
228 static int smb_direct_post_send_data(struct smb_direct_transport *t,
229 struct smb_direct_send_ctx *send_ctx,
230 struct kvec *iov, int niov,
231 int remaining_data_length);
232
233 static inline struct smb_direct_transport *
smb_trans_direct_transfort(struct ksmbd_transport * t)234 smb_trans_direct_transfort(struct ksmbd_transport *t)
235 {
236 return container_of(t, struct smb_direct_transport, transport);
237 }
238
239 static inline void
smb_direct_recvmsg_payload(struct smb_direct_recvmsg * recvmsg)240 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
241 {
242 return (void *)recvmsg->packet;
243 }
244
is_receive_credit_post_required(int receive_credits,int avail_recvmsg_count)245 static inline bool is_receive_credit_post_required(int receive_credits,
246 int avail_recvmsg_count)
247 {
248 return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
249 avail_recvmsg_count >= (receive_credits >> 2);
250 }
251
252 static struct
get_free_recvmsg(struct smb_direct_transport * t)253 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
254 {
255 struct smb_direct_recvmsg *recvmsg = NULL;
256
257 spin_lock(&t->recvmsg_queue_lock);
258 if (!list_empty(&t->recvmsg_queue)) {
259 recvmsg = list_first_entry(&t->recvmsg_queue,
260 struct smb_direct_recvmsg,
261 list);
262 list_del(&recvmsg->list);
263 }
264 spin_unlock(&t->recvmsg_queue_lock);
265 return recvmsg;
266 }
267
put_recvmsg(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)268 static void put_recvmsg(struct smb_direct_transport *t,
269 struct smb_direct_recvmsg *recvmsg)
270 {
271 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
272 recvmsg->sge.length, DMA_FROM_DEVICE);
273
274 spin_lock(&t->recvmsg_queue_lock);
275 list_add(&recvmsg->list, &t->recvmsg_queue);
276 spin_unlock(&t->recvmsg_queue_lock);
277 }
278
279 static struct
get_empty_recvmsg(struct smb_direct_transport * t)280 smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
281 {
282 struct smb_direct_recvmsg *recvmsg = NULL;
283
284 spin_lock(&t->empty_recvmsg_queue_lock);
285 if (!list_empty(&t->empty_recvmsg_queue)) {
286 recvmsg = list_first_entry(&t->empty_recvmsg_queue,
287 struct smb_direct_recvmsg, list);
288 list_del(&recvmsg->list);
289 }
290 spin_unlock(&t->empty_recvmsg_queue_lock);
291 return recvmsg;
292 }
293
put_empty_recvmsg(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)294 static void put_empty_recvmsg(struct smb_direct_transport *t,
295 struct smb_direct_recvmsg *recvmsg)
296 {
297 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
298 recvmsg->sge.length, DMA_FROM_DEVICE);
299
300 spin_lock(&t->empty_recvmsg_queue_lock);
301 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
302 spin_unlock(&t->empty_recvmsg_queue_lock);
303 }
304
enqueue_reassembly(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg,int data_length)305 static void enqueue_reassembly(struct smb_direct_transport *t,
306 struct smb_direct_recvmsg *recvmsg,
307 int data_length)
308 {
309 spin_lock(&t->reassembly_queue_lock);
310 list_add_tail(&recvmsg->list, &t->reassembly_queue);
311 t->reassembly_queue_length++;
312 /*
313 * Make sure reassembly_data_length is updated after list and
314 * reassembly_queue_length are updated. On the dequeue side
315 * reassembly_data_length is checked without a lock to determine
316 * if reassembly_queue_length and list is up to date
317 */
318 virt_wmb();
319 t->reassembly_data_length += data_length;
320 spin_unlock(&t->reassembly_queue_lock);
321 }
322
get_first_reassembly(struct smb_direct_transport * t)323 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
324 {
325 if (!list_empty(&t->reassembly_queue))
326 return list_first_entry(&t->reassembly_queue,
327 struct smb_direct_recvmsg, list);
328 else
329 return NULL;
330 }
331
smb_direct_disconnect_rdma_work(struct work_struct * work)332 static void smb_direct_disconnect_rdma_work(struct work_struct *work)
333 {
334 struct smb_direct_transport *t =
335 container_of(work, struct smb_direct_transport,
336 disconnect_work);
337
338 if (t->status == SMB_DIRECT_CS_CONNECTED) {
339 t->status = SMB_DIRECT_CS_DISCONNECTING;
340 rdma_disconnect(t->cm_id);
341 }
342 }
343
344 static void
smb_direct_disconnect_rdma_connection(struct smb_direct_transport * t)345 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
346 {
347 if (t->status == SMB_DIRECT_CS_CONNECTED)
348 queue_work(smb_direct_wq, &t->disconnect_work);
349 }
350
smb_direct_send_immediate_work(struct work_struct * work)351 static void smb_direct_send_immediate_work(struct work_struct *work)
352 {
353 struct smb_direct_transport *t = container_of(work,
354 struct smb_direct_transport, send_immediate_work);
355
356 if (t->status != SMB_DIRECT_CS_CONNECTED)
357 return;
358
359 smb_direct_post_send_data(t, NULL, NULL, 0, 0);
360 }
361
alloc_transport(struct rdma_cm_id * cm_id)362 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
363 {
364 struct smb_direct_transport *t;
365 struct ksmbd_conn *conn;
366
367 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP);
368 if (!t)
369 return NULL;
370
371 t->cm_id = cm_id;
372 cm_id->context = t;
373
374 t->status = SMB_DIRECT_CS_NEW;
375 init_waitqueue_head(&t->wait_status);
376
377 spin_lock_init(&t->reassembly_queue_lock);
378 INIT_LIST_HEAD(&t->reassembly_queue);
379 t->reassembly_data_length = 0;
380 t->reassembly_queue_length = 0;
381 init_waitqueue_head(&t->wait_reassembly_queue);
382 init_waitqueue_head(&t->wait_send_credits);
383 init_waitqueue_head(&t->wait_rw_credits);
384
385 spin_lock_init(&t->receive_credit_lock);
386 spin_lock_init(&t->recvmsg_queue_lock);
387 INIT_LIST_HEAD(&t->recvmsg_queue);
388
389 spin_lock_init(&t->empty_recvmsg_queue_lock);
390 INIT_LIST_HEAD(&t->empty_recvmsg_queue);
391
392 init_waitqueue_head(&t->wait_send_pending);
393 atomic_set(&t->send_pending, 0);
394
395 spin_lock_init(&t->lock_new_recv_credits);
396
397 INIT_DELAYED_WORK(&t->post_recv_credits_work,
398 smb_direct_post_recv_credits);
399 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
400 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
401
402 conn = ksmbd_conn_alloc();
403 if (!conn)
404 goto err;
405 conn->transport = KSMBD_TRANS(t);
406 KSMBD_TRANS(t)->conn = conn;
407 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
408 return t;
409 err:
410 kfree(t);
411 return NULL;
412 }
413
smb_direct_free_transport(struct ksmbd_transport * kt)414 static void smb_direct_free_transport(struct ksmbd_transport *kt)
415 {
416 kfree(SMBD_TRANS(kt));
417 }
418
free_transport(struct smb_direct_transport * t)419 static void free_transport(struct smb_direct_transport *t)
420 {
421 struct smb_direct_recvmsg *recvmsg;
422
423 wake_up_interruptible(&t->wait_send_credits);
424
425 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
426 wait_event(t->wait_send_pending,
427 atomic_read(&t->send_pending) == 0);
428
429 cancel_work_sync(&t->disconnect_work);
430 cancel_delayed_work_sync(&t->post_recv_credits_work);
431 cancel_work_sync(&t->send_immediate_work);
432
433 if (t->qp) {
434 ib_drain_qp(t->qp);
435 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
436 ib_destroy_qp(t->qp);
437 }
438
439 ksmbd_debug(RDMA, "drain the reassembly queue\n");
440 do {
441 spin_lock(&t->reassembly_queue_lock);
442 recvmsg = get_first_reassembly(t);
443 if (recvmsg) {
444 list_del(&recvmsg->list);
445 spin_unlock(&t->reassembly_queue_lock);
446 put_recvmsg(t, recvmsg);
447 } else {
448 spin_unlock(&t->reassembly_queue_lock);
449 }
450 } while (recvmsg);
451 t->reassembly_data_length = 0;
452
453 if (t->send_cq)
454 ib_free_cq(t->send_cq);
455 if (t->recv_cq)
456 ib_free_cq(t->recv_cq);
457 if (t->pd)
458 ib_dealloc_pd(t->pd);
459 if (t->cm_id)
460 rdma_destroy_id(t->cm_id);
461
462 smb_direct_destroy_pools(t);
463 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
464 }
465
466 static struct smb_direct_sendmsg
smb_direct_alloc_sendmsg(struct smb_direct_transport * t)467 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
468 {
469 struct smb_direct_sendmsg *msg;
470
471 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP);
472 if (!msg)
473 return ERR_PTR(-ENOMEM);
474 msg->transport = t;
475 INIT_LIST_HEAD(&msg->list);
476 msg->num_sge = 0;
477 return msg;
478 }
479
smb_direct_free_sendmsg(struct smb_direct_transport * t,struct smb_direct_sendmsg * msg)480 static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
481 struct smb_direct_sendmsg *msg)
482 {
483 int i;
484
485 if (msg->num_sge > 0) {
486 ib_dma_unmap_single(t->cm_id->device,
487 msg->sge[0].addr, msg->sge[0].length,
488 DMA_TO_DEVICE);
489 for (i = 1; i < msg->num_sge; i++)
490 ib_dma_unmap_page(t->cm_id->device,
491 msg->sge[i].addr, msg->sge[i].length,
492 DMA_TO_DEVICE);
493 }
494 mempool_free(msg, t->sendmsg_mempool);
495 }
496
smb_direct_check_recvmsg(struct smb_direct_recvmsg * recvmsg)497 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
498 {
499 switch (recvmsg->type) {
500 case SMB_DIRECT_MSG_DATA_TRANSFER: {
501 struct smb_direct_data_transfer *req =
502 (struct smb_direct_data_transfer *)recvmsg->packet;
503 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
504 + le32_to_cpu(req->data_offset));
505 ksmbd_debug(RDMA,
506 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
507 le16_to_cpu(req->credits_granted),
508 le16_to_cpu(req->credits_requested),
509 req->data_length, req->remaining_data_length,
510 hdr->ProtocolId, hdr->Command);
511 break;
512 }
513 case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
514 struct smb_direct_negotiate_req *req =
515 (struct smb_direct_negotiate_req *)recvmsg->packet;
516 ksmbd_debug(RDMA,
517 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
518 le16_to_cpu(req->min_version),
519 le16_to_cpu(req->max_version),
520 le16_to_cpu(req->credits_requested),
521 le32_to_cpu(req->preferred_send_size),
522 le32_to_cpu(req->max_receive_size),
523 le32_to_cpu(req->max_fragmented_size));
524 if (le16_to_cpu(req->min_version) > 0x0100 ||
525 le16_to_cpu(req->max_version) < 0x0100)
526 return -EOPNOTSUPP;
527 if (le16_to_cpu(req->credits_requested) <= 0 ||
528 le32_to_cpu(req->max_receive_size) <= 128 ||
529 le32_to_cpu(req->max_fragmented_size) <=
530 128 * 1024)
531 return -ECONNABORTED;
532
533 break;
534 }
535 default:
536 return -EINVAL;
537 }
538 return 0;
539 }
540
recv_done(struct ib_cq * cq,struct ib_wc * wc)541 static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
542 {
543 struct smb_direct_recvmsg *recvmsg;
544 struct smb_direct_transport *t;
545
546 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
547 t = recvmsg->transport;
548
549 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
550 if (wc->status != IB_WC_WR_FLUSH_ERR) {
551 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
552 ib_wc_status_msg(wc->status), wc->status,
553 wc->opcode);
554 smb_direct_disconnect_rdma_connection(t);
555 }
556 put_empty_recvmsg(t, recvmsg);
557 return;
558 }
559
560 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
561 ib_wc_status_msg(wc->status), wc->status,
562 wc->opcode);
563
564 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
565 recvmsg->sge.length, DMA_FROM_DEVICE);
566
567 switch (recvmsg->type) {
568 case SMB_DIRECT_MSG_NEGOTIATE_REQ:
569 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
570 put_empty_recvmsg(t, recvmsg);
571 return;
572 }
573 t->negotiation_requested = true;
574 t->full_packet_received = true;
575 t->status = SMB_DIRECT_CS_CONNECTED;
576 enqueue_reassembly(t, recvmsg, 0);
577 wake_up_interruptible(&t->wait_status);
578 break;
579 case SMB_DIRECT_MSG_DATA_TRANSFER: {
580 struct smb_direct_data_transfer *data_transfer =
581 (struct smb_direct_data_transfer *)recvmsg->packet;
582 unsigned int data_length;
583 int avail_recvmsg_count, receive_credits;
584
585 if (wc->byte_len <
586 offsetof(struct smb_direct_data_transfer, padding)) {
587 put_empty_recvmsg(t, recvmsg);
588 return;
589 }
590
591 data_length = le32_to_cpu(data_transfer->data_length);
592 if (data_length) {
593 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
594 (u64)data_length) {
595 put_empty_recvmsg(t, recvmsg);
596 return;
597 }
598
599 if (t->full_packet_received)
600 recvmsg->first_segment = true;
601
602 if (le32_to_cpu(data_transfer->remaining_data_length))
603 t->full_packet_received = false;
604 else
605 t->full_packet_received = true;
606
607 enqueue_reassembly(t, recvmsg, (int)data_length);
608 wake_up_interruptible(&t->wait_reassembly_queue);
609
610 spin_lock(&t->receive_credit_lock);
611 receive_credits = --(t->recv_credits);
612 avail_recvmsg_count = t->count_avail_recvmsg;
613 spin_unlock(&t->receive_credit_lock);
614 } else {
615 put_empty_recvmsg(t, recvmsg);
616
617 spin_lock(&t->receive_credit_lock);
618 receive_credits = --(t->recv_credits);
619 avail_recvmsg_count = ++(t->count_avail_recvmsg);
620 spin_unlock(&t->receive_credit_lock);
621 }
622
623 t->recv_credit_target =
624 le16_to_cpu(data_transfer->credits_requested);
625 atomic_add(le16_to_cpu(data_transfer->credits_granted),
626 &t->send_credits);
627
628 if (le16_to_cpu(data_transfer->flags) &
629 SMB_DIRECT_RESPONSE_REQUESTED)
630 queue_work(smb_direct_wq, &t->send_immediate_work);
631
632 if (atomic_read(&t->send_credits) > 0)
633 wake_up_interruptible(&t->wait_send_credits);
634
635 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
636 mod_delayed_work(smb_direct_wq,
637 &t->post_recv_credits_work, 0);
638 break;
639 }
640 default:
641 break;
642 }
643 }
644
smb_direct_post_recv(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)645 static int smb_direct_post_recv(struct smb_direct_transport *t,
646 struct smb_direct_recvmsg *recvmsg)
647 {
648 struct ib_recv_wr wr;
649 int ret;
650
651 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
652 recvmsg->packet, t->max_recv_size,
653 DMA_FROM_DEVICE);
654 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
655 if (ret)
656 return ret;
657 recvmsg->sge.length = t->max_recv_size;
658 recvmsg->sge.lkey = t->pd->local_dma_lkey;
659 recvmsg->cqe.done = recv_done;
660
661 wr.wr_cqe = &recvmsg->cqe;
662 wr.next = NULL;
663 wr.sg_list = &recvmsg->sge;
664 wr.num_sge = 1;
665
666 ret = ib_post_recv(t->qp, &wr, NULL);
667 if (ret) {
668 pr_err("Can't post recv: %d\n", ret);
669 ib_dma_unmap_single(t->cm_id->device,
670 recvmsg->sge.addr, recvmsg->sge.length,
671 DMA_FROM_DEVICE);
672 smb_direct_disconnect_rdma_connection(t);
673 return ret;
674 }
675 return ret;
676 }
677
smb_direct_read(struct ksmbd_transport * t,char * buf,unsigned int size,int unused)678 static int smb_direct_read(struct ksmbd_transport *t, char *buf,
679 unsigned int size, int unused)
680 {
681 struct smb_direct_recvmsg *recvmsg;
682 struct smb_direct_data_transfer *data_transfer;
683 int to_copy, to_read, data_read, offset;
684 u32 data_length, remaining_data_length, data_offset;
685 int rc;
686 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
687
688 again:
689 if (st->status != SMB_DIRECT_CS_CONNECTED) {
690 pr_err("disconnected\n");
691 return -ENOTCONN;
692 }
693
694 /*
695 * No need to hold the reassembly queue lock all the time as we are
696 * the only one reading from the front of the queue. The transport
697 * may add more entries to the back of the queue at the same time
698 */
699 if (st->reassembly_data_length >= size) {
700 int queue_length;
701 int queue_removed = 0;
702
703 /*
704 * Need to make sure reassembly_data_length is read before
705 * reading reassembly_queue_length and calling
706 * get_first_reassembly. This call is lock free
707 * as we never read at the end of the queue which are being
708 * updated in SOFTIRQ as more data is received
709 */
710 virt_rmb();
711 queue_length = st->reassembly_queue_length;
712 data_read = 0;
713 to_read = size;
714 offset = st->first_entry_offset;
715 while (data_read < size) {
716 recvmsg = get_first_reassembly(st);
717 data_transfer = smb_direct_recvmsg_payload(recvmsg);
718 data_length = le32_to_cpu(data_transfer->data_length);
719 remaining_data_length =
720 le32_to_cpu(data_transfer->remaining_data_length);
721 data_offset = le32_to_cpu(data_transfer->data_offset);
722
723 /*
724 * The upper layer expects RFC1002 length at the
725 * beginning of the payload. Return it to indicate
726 * the total length of the packet. This minimize the
727 * change to upper layer packet processing logic. This
728 * will be eventually remove when an intermediate
729 * transport layer is added
730 */
731 if (recvmsg->first_segment && size == 4) {
732 unsigned int rfc1002_len =
733 data_length + remaining_data_length;
734 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
735 data_read = 4;
736 recvmsg->first_segment = false;
737 ksmbd_debug(RDMA,
738 "returning rfc1002 length %d\n",
739 rfc1002_len);
740 goto read_rfc1002_done;
741 }
742
743 to_copy = min_t(int, data_length - offset, to_read);
744 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
745 to_copy);
746
747 /* move on to the next buffer? */
748 if (to_copy == data_length - offset) {
749 queue_length--;
750 /*
751 * No need to lock if we are not at the
752 * end of the queue
753 */
754 if (queue_length) {
755 list_del(&recvmsg->list);
756 } else {
757 spin_lock_irq(&st->reassembly_queue_lock);
758 list_del(&recvmsg->list);
759 spin_unlock_irq(&st->reassembly_queue_lock);
760 }
761 queue_removed++;
762 put_recvmsg(st, recvmsg);
763 offset = 0;
764 } else {
765 offset += to_copy;
766 }
767
768 to_read -= to_copy;
769 data_read += to_copy;
770 }
771
772 spin_lock_irq(&st->reassembly_queue_lock);
773 st->reassembly_data_length -= data_read;
774 st->reassembly_queue_length -= queue_removed;
775 spin_unlock_irq(&st->reassembly_queue_lock);
776
777 spin_lock(&st->receive_credit_lock);
778 st->count_avail_recvmsg += queue_removed;
779 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
780 spin_unlock(&st->receive_credit_lock);
781 mod_delayed_work(smb_direct_wq,
782 &st->post_recv_credits_work, 0);
783 } else {
784 spin_unlock(&st->receive_credit_lock);
785 }
786
787 st->first_entry_offset = offset;
788 ksmbd_debug(RDMA,
789 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
790 data_read, st->reassembly_data_length,
791 st->first_entry_offset);
792 read_rfc1002_done:
793 return data_read;
794 }
795
796 ksmbd_debug(RDMA, "wait_event on more data\n");
797 rc = wait_event_interruptible(st->wait_reassembly_queue,
798 st->reassembly_data_length >= size ||
799 st->status != SMB_DIRECT_CS_CONNECTED);
800 if (rc)
801 return -EINTR;
802
803 goto again;
804 }
805
smb_direct_post_recv_credits(struct work_struct * work)806 static void smb_direct_post_recv_credits(struct work_struct *work)
807 {
808 struct smb_direct_transport *t = container_of(work,
809 struct smb_direct_transport, post_recv_credits_work.work);
810 struct smb_direct_recvmsg *recvmsg;
811 int receive_credits, credits = 0;
812 int ret;
813 int use_free = 1;
814
815 spin_lock(&t->receive_credit_lock);
816 receive_credits = t->recv_credits;
817 spin_unlock(&t->receive_credit_lock);
818
819 if (receive_credits < t->recv_credit_target) {
820 while (true) {
821 if (use_free)
822 recvmsg = get_free_recvmsg(t);
823 else
824 recvmsg = get_empty_recvmsg(t);
825 if (!recvmsg) {
826 if (use_free) {
827 use_free = 0;
828 continue;
829 } else {
830 break;
831 }
832 }
833
834 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
835 recvmsg->first_segment = false;
836
837 ret = smb_direct_post_recv(t, recvmsg);
838 if (ret) {
839 pr_err("Can't post recv: %d\n", ret);
840 put_recvmsg(t, recvmsg);
841 break;
842 }
843 credits++;
844 }
845 }
846
847 spin_lock(&t->receive_credit_lock);
848 t->recv_credits += credits;
849 t->count_avail_recvmsg -= credits;
850 spin_unlock(&t->receive_credit_lock);
851
852 spin_lock(&t->lock_new_recv_credits);
853 t->new_recv_credits += credits;
854 spin_unlock(&t->lock_new_recv_credits);
855
856 if (credits)
857 queue_work(smb_direct_wq, &t->send_immediate_work);
858 }
859
send_done(struct ib_cq * cq,struct ib_wc * wc)860 static void send_done(struct ib_cq *cq, struct ib_wc *wc)
861 {
862 struct smb_direct_sendmsg *sendmsg, *sibling;
863 struct smb_direct_transport *t;
864 struct list_head *pos, *prev, *end;
865
866 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
867 t = sendmsg->transport;
868
869 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
870 ib_wc_status_msg(wc->status), wc->status,
871 wc->opcode);
872
873 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
874 pr_err("Send error. status='%s (%d)', opcode=%d\n",
875 ib_wc_status_msg(wc->status), wc->status,
876 wc->opcode);
877 smb_direct_disconnect_rdma_connection(t);
878 }
879
880 if (atomic_dec_and_test(&t->send_pending))
881 wake_up(&t->wait_send_pending);
882
883 /* iterate and free the list of messages in reverse. the list's head
884 * is invalid.
885 */
886 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
887 prev != end; pos = prev, prev = prev->prev) {
888 sibling = container_of(pos, struct smb_direct_sendmsg, list);
889 smb_direct_free_sendmsg(t, sibling);
890 }
891
892 sibling = container_of(pos, struct smb_direct_sendmsg, list);
893 smb_direct_free_sendmsg(t, sibling);
894 }
895
manage_credits_prior_sending(struct smb_direct_transport * t)896 static int manage_credits_prior_sending(struct smb_direct_transport *t)
897 {
898 int new_credits;
899
900 spin_lock(&t->lock_new_recv_credits);
901 new_credits = t->new_recv_credits;
902 t->new_recv_credits = 0;
903 spin_unlock(&t->lock_new_recv_credits);
904
905 return new_credits;
906 }
907
smb_direct_post_send(struct smb_direct_transport * t,struct ib_send_wr * wr)908 static int smb_direct_post_send(struct smb_direct_transport *t,
909 struct ib_send_wr *wr)
910 {
911 int ret;
912
913 atomic_inc(&t->send_pending);
914 ret = ib_post_send(t->qp, wr, NULL);
915 if (ret) {
916 pr_err("failed to post send: %d\n", ret);
917 if (atomic_dec_and_test(&t->send_pending))
918 wake_up(&t->wait_send_pending);
919 smb_direct_disconnect_rdma_connection(t);
920 }
921 return ret;
922 }
923
smb_direct_send_ctx_init(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,bool need_invalidate_rkey,unsigned int remote_key)924 static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
925 struct smb_direct_send_ctx *send_ctx,
926 bool need_invalidate_rkey,
927 unsigned int remote_key)
928 {
929 INIT_LIST_HEAD(&send_ctx->msg_list);
930 send_ctx->wr_cnt = 0;
931 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
932 send_ctx->remote_key = remote_key;
933 }
934
smb_direct_flush_send_list(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,bool is_last)935 static int smb_direct_flush_send_list(struct smb_direct_transport *t,
936 struct smb_direct_send_ctx *send_ctx,
937 bool is_last)
938 {
939 struct smb_direct_sendmsg *first, *last;
940 int ret;
941
942 if (list_empty(&send_ctx->msg_list))
943 return 0;
944
945 first = list_first_entry(&send_ctx->msg_list,
946 struct smb_direct_sendmsg,
947 list);
948 last = list_last_entry(&send_ctx->msg_list,
949 struct smb_direct_sendmsg,
950 list);
951
952 last->wr.send_flags = IB_SEND_SIGNALED;
953 last->wr.wr_cqe = &last->cqe;
954 if (is_last && send_ctx->need_invalidate_rkey) {
955 last->wr.opcode = IB_WR_SEND_WITH_INV;
956 last->wr.ex.invalidate_rkey = send_ctx->remote_key;
957 }
958
959 ret = smb_direct_post_send(t, &first->wr);
960 if (!ret) {
961 smb_direct_send_ctx_init(t, send_ctx,
962 send_ctx->need_invalidate_rkey,
963 send_ctx->remote_key);
964 } else {
965 atomic_add(send_ctx->wr_cnt, &t->send_credits);
966 wake_up(&t->wait_send_credits);
967 list_for_each_entry_safe(first, last, &send_ctx->msg_list,
968 list) {
969 smb_direct_free_sendmsg(t, first);
970 }
971 }
972 return ret;
973 }
974
wait_for_credits(struct smb_direct_transport * t,wait_queue_head_t * waitq,atomic_t * total_credits,int needed)975 static int wait_for_credits(struct smb_direct_transport *t,
976 wait_queue_head_t *waitq, atomic_t *total_credits,
977 int needed)
978 {
979 int ret;
980
981 do {
982 if (atomic_sub_return(needed, total_credits) >= 0)
983 return 0;
984
985 atomic_add(needed, total_credits);
986 ret = wait_event_interruptible(*waitq,
987 atomic_read(total_credits) >= needed ||
988 t->status != SMB_DIRECT_CS_CONNECTED);
989
990 if (t->status != SMB_DIRECT_CS_CONNECTED)
991 return -ENOTCONN;
992 else if (ret < 0)
993 return ret;
994 } while (true);
995 }
996
wait_for_send_credits(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx)997 static int wait_for_send_credits(struct smb_direct_transport *t,
998 struct smb_direct_send_ctx *send_ctx)
999 {
1000 int ret;
1001
1002 if (send_ctx &&
1003 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
1004 ret = smb_direct_flush_send_list(t, send_ctx, false);
1005 if (ret)
1006 return ret;
1007 }
1008
1009 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
1010 }
1011
wait_for_rw_credits(struct smb_direct_transport * t,int credits)1012 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
1013 {
1014 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
1015 }
1016
calc_rw_credits(struct smb_direct_transport * t,char * buf,unsigned int len)1017 static int calc_rw_credits(struct smb_direct_transport *t,
1018 char *buf, unsigned int len)
1019 {
1020 return DIV_ROUND_UP(get_buf_page_count(buf, len),
1021 t->pages_per_rw_credit);
1022 }
1023
smb_direct_create_header(struct smb_direct_transport * t,int size,int remaining_data_length,struct smb_direct_sendmsg ** sendmsg_out)1024 static int smb_direct_create_header(struct smb_direct_transport *t,
1025 int size, int remaining_data_length,
1026 struct smb_direct_sendmsg **sendmsg_out)
1027 {
1028 struct smb_direct_sendmsg *sendmsg;
1029 struct smb_direct_data_transfer *packet;
1030 int header_length;
1031 int ret;
1032
1033 sendmsg = smb_direct_alloc_sendmsg(t);
1034 if (IS_ERR(sendmsg))
1035 return PTR_ERR(sendmsg);
1036
1037 /* Fill in the packet header */
1038 packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1039 packet->credits_requested = cpu_to_le16(t->send_credit_target);
1040 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1041
1042 packet->flags = 0;
1043 packet->reserved = 0;
1044 if (!size)
1045 packet->data_offset = 0;
1046 else
1047 packet->data_offset = cpu_to_le32(24);
1048 packet->data_length = cpu_to_le32(size);
1049 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1050 packet->padding = 0;
1051
1052 ksmbd_debug(RDMA,
1053 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1054 le16_to_cpu(packet->credits_requested),
1055 le16_to_cpu(packet->credits_granted),
1056 le32_to_cpu(packet->data_offset),
1057 le32_to_cpu(packet->data_length),
1058 le32_to_cpu(packet->remaining_data_length));
1059
1060 /* Map the packet to DMA */
1061 header_length = sizeof(struct smb_direct_data_transfer);
1062 /* If this is a packet without payload, don't send padding */
1063 if (!size)
1064 header_length =
1065 offsetof(struct smb_direct_data_transfer, padding);
1066
1067 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1068 (void *)packet,
1069 header_length,
1070 DMA_TO_DEVICE);
1071 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1072 if (ret) {
1073 smb_direct_free_sendmsg(t, sendmsg);
1074 return ret;
1075 }
1076
1077 sendmsg->num_sge = 1;
1078 sendmsg->sge[0].length = header_length;
1079 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1080
1081 *sendmsg_out = sendmsg;
1082 return 0;
1083 }
1084
get_sg_list(void * buf,int size,struct scatterlist * sg_list,int nentries)1085 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
1086 {
1087 bool high = is_vmalloc_addr(buf);
1088 struct page *page;
1089 int offset, len;
1090 int i = 0;
1091
1092 if (size <= 0 || nentries < get_buf_page_count(buf, size))
1093 return -EINVAL;
1094
1095 offset = offset_in_page(buf);
1096 buf -= offset;
1097 while (size > 0) {
1098 len = min_t(int, PAGE_SIZE - offset, size);
1099 if (high)
1100 page = vmalloc_to_page(buf);
1101 else
1102 page = kmap_to_page(buf);
1103
1104 if (!sg_list)
1105 return -EINVAL;
1106 sg_set_page(sg_list, page, len, offset);
1107 sg_list = sg_next(sg_list);
1108
1109 buf += PAGE_SIZE;
1110 size -= len;
1111 offset = 0;
1112 i++;
1113 }
1114 return i;
1115 }
1116
get_mapped_sg_list(struct ib_device * device,void * buf,int size,struct scatterlist * sg_list,int nentries,enum dma_data_direction dir)1117 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
1118 struct scatterlist *sg_list, int nentries,
1119 enum dma_data_direction dir)
1120 {
1121 int npages;
1122
1123 npages = get_sg_list(buf, size, sg_list, nentries);
1124 if (npages < 0)
1125 return -EINVAL;
1126 return ib_dma_map_sg(device, sg_list, npages, dir);
1127 }
1128
post_sendmsg(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,struct smb_direct_sendmsg * msg)1129 static int post_sendmsg(struct smb_direct_transport *t,
1130 struct smb_direct_send_ctx *send_ctx,
1131 struct smb_direct_sendmsg *msg)
1132 {
1133 int i;
1134
1135 for (i = 0; i < msg->num_sge; i++)
1136 ib_dma_sync_single_for_device(t->cm_id->device,
1137 msg->sge[i].addr, msg->sge[i].length,
1138 DMA_TO_DEVICE);
1139
1140 msg->cqe.done = send_done;
1141 msg->wr.opcode = IB_WR_SEND;
1142 msg->wr.sg_list = &msg->sge[0];
1143 msg->wr.num_sge = msg->num_sge;
1144 msg->wr.next = NULL;
1145
1146 if (send_ctx) {
1147 msg->wr.wr_cqe = NULL;
1148 msg->wr.send_flags = 0;
1149 if (!list_empty(&send_ctx->msg_list)) {
1150 struct smb_direct_sendmsg *last;
1151
1152 last = list_last_entry(&send_ctx->msg_list,
1153 struct smb_direct_sendmsg,
1154 list);
1155 last->wr.next = &msg->wr;
1156 }
1157 list_add_tail(&msg->list, &send_ctx->msg_list);
1158 send_ctx->wr_cnt++;
1159 return 0;
1160 }
1161
1162 msg->wr.wr_cqe = &msg->cqe;
1163 msg->wr.send_flags = IB_SEND_SIGNALED;
1164 return smb_direct_post_send(t, &msg->wr);
1165 }
1166
smb_direct_post_send_data(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,struct kvec * iov,int niov,int remaining_data_length)1167 static int smb_direct_post_send_data(struct smb_direct_transport *t,
1168 struct smb_direct_send_ctx *send_ctx,
1169 struct kvec *iov, int niov,
1170 int remaining_data_length)
1171 {
1172 int i, j, ret;
1173 struct smb_direct_sendmsg *msg;
1174 int data_length;
1175 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
1176
1177 ret = wait_for_send_credits(t, send_ctx);
1178 if (ret)
1179 return ret;
1180
1181 data_length = 0;
1182 for (i = 0; i < niov; i++)
1183 data_length += iov[i].iov_len;
1184
1185 ret = smb_direct_create_header(t, data_length, remaining_data_length,
1186 &msg);
1187 if (ret) {
1188 atomic_inc(&t->send_credits);
1189 return ret;
1190 }
1191
1192 for (i = 0; i < niov; i++) {
1193 struct ib_sge *sge;
1194 int sg_cnt;
1195
1196 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
1197 sg_cnt = get_mapped_sg_list(t->cm_id->device,
1198 iov[i].iov_base, iov[i].iov_len,
1199 sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1200 DMA_TO_DEVICE);
1201 if (sg_cnt <= 0) {
1202 pr_err("failed to map buffer\n");
1203 ret = -ENOMEM;
1204 goto err;
1205 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
1206 pr_err("buffer not fitted into sges\n");
1207 ret = -E2BIG;
1208 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1209 DMA_TO_DEVICE);
1210 goto err;
1211 }
1212
1213 for (j = 0; j < sg_cnt; j++) {
1214 sge = &msg->sge[msg->num_sge];
1215 sge->addr = sg_dma_address(&sg[j]);
1216 sge->length = sg_dma_len(&sg[j]);
1217 sge->lkey = t->pd->local_dma_lkey;
1218 msg->num_sge++;
1219 }
1220 }
1221
1222 ret = post_sendmsg(t, send_ctx, msg);
1223 if (ret)
1224 goto err;
1225 return 0;
1226 err:
1227 smb_direct_free_sendmsg(t, msg);
1228 atomic_inc(&t->send_credits);
1229 return ret;
1230 }
1231
smb_direct_writev(struct ksmbd_transport * t,struct kvec * iov,int niovs,int buflen,bool need_invalidate,unsigned int remote_key)1232 static int smb_direct_writev(struct ksmbd_transport *t,
1233 struct kvec *iov, int niovs, int buflen,
1234 bool need_invalidate, unsigned int remote_key)
1235 {
1236 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1237 int remaining_data_length;
1238 int start, i, j;
1239 int max_iov_size = st->max_send_size -
1240 sizeof(struct smb_direct_data_transfer);
1241 int ret;
1242 struct kvec vec;
1243 struct smb_direct_send_ctx send_ctx;
1244
1245 if (st->status != SMB_DIRECT_CS_CONNECTED)
1246 return -ENOTCONN;
1247
1248 //FIXME: skip RFC1002 header..
1249 buflen -= 4;
1250
1251 remaining_data_length = buflen;
1252 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1253
1254 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1255 start = i = 1;
1256 buflen = 0;
1257 while (true) {
1258 buflen += iov[i].iov_len;
1259 if (buflen > max_iov_size) {
1260 if (i > start) {
1261 remaining_data_length -=
1262 (buflen - iov[i].iov_len);
1263 ret = smb_direct_post_send_data(st, &send_ctx,
1264 &iov[start], i - start,
1265 remaining_data_length);
1266 if (ret)
1267 goto done;
1268 } else {
1269 /* iov[start] is too big, break it */
1270 int nvec = (buflen + max_iov_size - 1) /
1271 max_iov_size;
1272
1273 for (j = 0; j < nvec; j++) {
1274 vec.iov_base =
1275 (char *)iov[start].iov_base +
1276 j * max_iov_size;
1277 vec.iov_len =
1278 min_t(int, max_iov_size,
1279 buflen - max_iov_size * j);
1280 remaining_data_length -= vec.iov_len;
1281 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1282 remaining_data_length);
1283 if (ret)
1284 goto done;
1285 }
1286 i++;
1287 if (i == niovs)
1288 break;
1289 }
1290 start = i;
1291 buflen = 0;
1292 } else {
1293 i++;
1294 if (i == niovs) {
1295 /* send out all remaining vecs */
1296 remaining_data_length -= buflen;
1297 ret = smb_direct_post_send_data(st, &send_ctx,
1298 &iov[start], i - start,
1299 remaining_data_length);
1300 if (ret)
1301 goto done;
1302 break;
1303 }
1304 }
1305 }
1306
1307 done:
1308 ret = smb_direct_flush_send_list(st, &send_ctx, true);
1309
1310 /*
1311 * As an optimization, we don't wait for individual I/O to finish
1312 * before sending the next one.
1313 * Send them all and wait for pending send count to get to 0
1314 * that means all the I/Os have been out and we are good to return
1315 */
1316
1317 wait_event(st->wait_send_pending,
1318 atomic_read(&st->send_pending) == 0);
1319 return ret;
1320 }
1321
smb_direct_free_rdma_rw_msg(struct smb_direct_transport * t,struct smb_direct_rdma_rw_msg * msg,enum dma_data_direction dir)1322 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
1323 struct smb_direct_rdma_rw_msg *msg,
1324 enum dma_data_direction dir)
1325 {
1326 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
1327 msg->sgt.sgl, msg->sgt.nents, dir);
1328 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1329 kfree(msg);
1330 }
1331
read_write_done(struct ib_cq * cq,struct ib_wc * wc,enum dma_data_direction dir)1332 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
1333 enum dma_data_direction dir)
1334 {
1335 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
1336 struct smb_direct_rdma_rw_msg, cqe);
1337 struct smb_direct_transport *t = msg->t;
1338
1339 if (wc->status != IB_WC_SUCCESS) {
1340 msg->status = -EIO;
1341 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1342 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
1343 if (wc->status != IB_WC_WR_FLUSH_ERR)
1344 smb_direct_disconnect_rdma_connection(t);
1345 }
1346
1347 complete(msg->completion);
1348 }
1349
read_done(struct ib_cq * cq,struct ib_wc * wc)1350 static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1351 {
1352 read_write_done(cq, wc, DMA_FROM_DEVICE);
1353 }
1354
write_done(struct ib_cq * cq,struct ib_wc * wc)1355 static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1356 {
1357 read_write_done(cq, wc, DMA_TO_DEVICE);
1358 }
1359
smb_direct_rdma_xmit(struct smb_direct_transport * t,void * buf,int buf_len,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len,bool is_read)1360 static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
1361 void *buf, int buf_len,
1362 struct smb2_buffer_desc_v1 *desc,
1363 unsigned int desc_len,
1364 bool is_read)
1365 {
1366 struct smb_direct_rdma_rw_msg *msg, *next_msg;
1367 int i, ret;
1368 DECLARE_COMPLETION_ONSTACK(completion);
1369 struct ib_send_wr *first_wr;
1370 LIST_HEAD(msg_list);
1371 char *desc_buf;
1372 int credits_needed;
1373 unsigned int desc_buf_len, desc_num = 0;
1374
1375 if (t->status != SMB_DIRECT_CS_CONNECTED)
1376 return -ENOTCONN;
1377
1378 if (buf_len > t->max_rdma_rw_size)
1379 return -EINVAL;
1380
1381 /* calculate needed credits */
1382 credits_needed = 0;
1383 desc_buf = buf;
1384 for (i = 0; i < desc_len / sizeof(*desc); i++) {
1385 if (!buf_len)
1386 break;
1387
1388 desc_buf_len = le32_to_cpu(desc[i].length);
1389 if (!desc_buf_len)
1390 return -EINVAL;
1391
1392 if (desc_buf_len > buf_len) {
1393 desc_buf_len = buf_len;
1394 desc[i].length = cpu_to_le32(desc_buf_len);
1395 buf_len = 0;
1396 }
1397
1398 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
1399 desc_buf += desc_buf_len;
1400 buf_len -= desc_buf_len;
1401 desc_num++;
1402 }
1403
1404 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
1405 str_read_write(is_read), buf_len, credits_needed);
1406
1407 ret = wait_for_rw_credits(t, credits_needed);
1408 if (ret < 0)
1409 return ret;
1410
1411 /* build rdma_rw_ctx for each descriptor */
1412 desc_buf = buf;
1413 for (i = 0; i < desc_num; i++) {
1414 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE),
1415 KSMBD_DEFAULT_GFP);
1416 if (!msg) {
1417 ret = -ENOMEM;
1418 goto out;
1419 }
1420
1421 desc_buf_len = le32_to_cpu(desc[i].length);
1422
1423 msg->t = t;
1424 msg->cqe.done = is_read ? read_done : write_done;
1425 msg->completion = &completion;
1426
1427 msg->sgt.sgl = &msg->sg_list[0];
1428 ret = sg_alloc_table_chained(&msg->sgt,
1429 get_buf_page_count(desc_buf, desc_buf_len),
1430 msg->sg_list, SG_CHUNK_SIZE);
1431 if (ret) {
1432 kfree(msg);
1433 ret = -ENOMEM;
1434 goto out;
1435 }
1436
1437 ret = get_sg_list(desc_buf, desc_buf_len,
1438 msg->sgt.sgl, msg->sgt.orig_nents);
1439 if (ret < 0) {
1440 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1441 kfree(msg);
1442 goto out;
1443 }
1444
1445 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
1446 msg->sgt.sgl,
1447 get_buf_page_count(desc_buf, desc_buf_len),
1448 0,
1449 le64_to_cpu(desc[i].offset),
1450 le32_to_cpu(desc[i].token),
1451 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1452 if (ret < 0) {
1453 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
1454 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1455 kfree(msg);
1456 goto out;
1457 }
1458
1459 list_add_tail(&msg->list, &msg_list);
1460 desc_buf += desc_buf_len;
1461 }
1462
1463 /* concatenate work requests of rdma_rw_ctxs */
1464 first_wr = NULL;
1465 list_for_each_entry_reverse(msg, &msg_list, list) {
1466 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
1467 &msg->cqe, first_wr);
1468 }
1469
1470 ret = ib_post_send(t->qp, first_wr, NULL);
1471 if (ret) {
1472 pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
1473 goto out;
1474 }
1475
1476 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
1477 wait_for_completion(&completion);
1478 ret = msg->status;
1479 out:
1480 list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
1481 list_del(&msg->list);
1482 smb_direct_free_rdma_rw_msg(t, msg,
1483 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1484 }
1485 atomic_add(credits_needed, &t->rw_credits);
1486 wake_up(&t->wait_rw_credits);
1487 return ret;
1488 }
1489
smb_direct_rdma_write(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len)1490 static int smb_direct_rdma_write(struct ksmbd_transport *t,
1491 void *buf, unsigned int buflen,
1492 struct smb2_buffer_desc_v1 *desc,
1493 unsigned int desc_len)
1494 {
1495 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1496 desc, desc_len, false);
1497 }
1498
smb_direct_rdma_read(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len)1499 static int smb_direct_rdma_read(struct ksmbd_transport *t,
1500 void *buf, unsigned int buflen,
1501 struct smb2_buffer_desc_v1 *desc,
1502 unsigned int desc_len)
1503 {
1504 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1505 desc, desc_len, true);
1506 }
1507
smb_direct_disconnect(struct ksmbd_transport * t)1508 static void smb_direct_disconnect(struct ksmbd_transport *t)
1509 {
1510 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1511
1512 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1513
1514 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1515 wait_event_interruptible(st->wait_status,
1516 st->status == SMB_DIRECT_CS_DISCONNECTED);
1517 free_transport(st);
1518 }
1519
smb_direct_shutdown(struct ksmbd_transport * t)1520 static void smb_direct_shutdown(struct ksmbd_transport *t)
1521 {
1522 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1523
1524 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
1525
1526 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1527 }
1528
smb_direct_cm_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1529 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
1530 struct rdma_cm_event *event)
1531 {
1532 struct smb_direct_transport *t = cm_id->context;
1533
1534 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
1535 cm_id, rdma_event_msg(event->event), event->event);
1536
1537 switch (event->event) {
1538 case RDMA_CM_EVENT_ESTABLISHED: {
1539 t->status = SMB_DIRECT_CS_CONNECTED;
1540 wake_up_interruptible(&t->wait_status);
1541 break;
1542 }
1543 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1544 case RDMA_CM_EVENT_DISCONNECTED: {
1545 ib_drain_qp(t->qp);
1546
1547 t->status = SMB_DIRECT_CS_DISCONNECTED;
1548 wake_up_interruptible(&t->wait_status);
1549 wake_up_interruptible(&t->wait_reassembly_queue);
1550 wake_up(&t->wait_send_credits);
1551 break;
1552 }
1553 case RDMA_CM_EVENT_CONNECT_ERROR: {
1554 t->status = SMB_DIRECT_CS_DISCONNECTED;
1555 wake_up_interruptible(&t->wait_status);
1556 break;
1557 }
1558 default:
1559 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1560 cm_id, rdma_event_msg(event->event),
1561 event->event);
1562 break;
1563 }
1564 return 0;
1565 }
1566
smb_direct_qpair_handler(struct ib_event * event,void * context)1567 static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1568 {
1569 struct smb_direct_transport *t = context;
1570
1571 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
1572 t->cm_id, ib_event_msg(event->event), event->event);
1573
1574 switch (event->event) {
1575 case IB_EVENT_CQ_ERR:
1576 case IB_EVENT_QP_FATAL:
1577 smb_direct_disconnect_rdma_connection(t);
1578 break;
1579 default:
1580 break;
1581 }
1582 }
1583
smb_direct_send_negotiate_response(struct smb_direct_transport * t,int failed)1584 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
1585 int failed)
1586 {
1587 struct smb_direct_sendmsg *sendmsg;
1588 struct smb_direct_negotiate_resp *resp;
1589 int ret;
1590
1591 sendmsg = smb_direct_alloc_sendmsg(t);
1592 if (IS_ERR(sendmsg))
1593 return -ENOMEM;
1594
1595 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1596 if (failed) {
1597 memset(resp, 0, sizeof(*resp));
1598 resp->min_version = cpu_to_le16(0x0100);
1599 resp->max_version = cpu_to_le16(0x0100);
1600 resp->status = STATUS_NOT_SUPPORTED;
1601 } else {
1602 resp->status = STATUS_SUCCESS;
1603 resp->min_version = SMB_DIRECT_VERSION_LE;
1604 resp->max_version = SMB_DIRECT_VERSION_LE;
1605 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1606 resp->reserved = 0;
1607 resp->credits_requested =
1608 cpu_to_le16(t->send_credit_target);
1609 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1610 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1611 resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1612 resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1613 resp->max_fragmented_size =
1614 cpu_to_le32(t->max_fragmented_recv_size);
1615 }
1616
1617 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1618 (void *)resp, sizeof(*resp),
1619 DMA_TO_DEVICE);
1620 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1621 if (ret) {
1622 smb_direct_free_sendmsg(t, sendmsg);
1623 return ret;
1624 }
1625
1626 sendmsg->num_sge = 1;
1627 sendmsg->sge[0].length = sizeof(*resp);
1628 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1629
1630 ret = post_sendmsg(t, NULL, sendmsg);
1631 if (ret) {
1632 smb_direct_free_sendmsg(t, sendmsg);
1633 return ret;
1634 }
1635
1636 wait_event(t->wait_send_pending,
1637 atomic_read(&t->send_pending) == 0);
1638 return 0;
1639 }
1640
smb_direct_accept_client(struct smb_direct_transport * t)1641 static int smb_direct_accept_client(struct smb_direct_transport *t)
1642 {
1643 struct rdma_conn_param conn_param;
1644 struct ib_port_immutable port_immutable;
1645 u32 ird_ord_hdr[2];
1646 int ret;
1647
1648 memset(&conn_param, 0, sizeof(conn_param));
1649 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1650 SMB_DIRECT_CM_INITIATOR_DEPTH);
1651 conn_param.responder_resources = 0;
1652
1653 t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
1654 t->cm_id->port_num,
1655 &port_immutable);
1656 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1657 ird_ord_hdr[0] = conn_param.responder_resources;
1658 ird_ord_hdr[1] = 1;
1659 conn_param.private_data = ird_ord_hdr;
1660 conn_param.private_data_len = sizeof(ird_ord_hdr);
1661 } else {
1662 conn_param.private_data = NULL;
1663 conn_param.private_data_len = 0;
1664 }
1665 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1666 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1667 conn_param.flow_control = 0;
1668
1669 ret = rdma_accept(t->cm_id, &conn_param);
1670 if (ret) {
1671 pr_err("error at rdma_accept: %d\n", ret);
1672 return ret;
1673 }
1674 return 0;
1675 }
1676
smb_direct_prepare_negotiation(struct smb_direct_transport * t)1677 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
1678 {
1679 int ret;
1680 struct smb_direct_recvmsg *recvmsg;
1681
1682 recvmsg = get_free_recvmsg(t);
1683 if (!recvmsg)
1684 return -ENOMEM;
1685 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1686
1687 ret = smb_direct_post_recv(t, recvmsg);
1688 if (ret) {
1689 pr_err("Can't post recv: %d\n", ret);
1690 goto out_err;
1691 }
1692
1693 t->negotiation_requested = false;
1694 ret = smb_direct_accept_client(t);
1695 if (ret) {
1696 pr_err("Can't accept client\n");
1697 goto out_err;
1698 }
1699
1700 smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
1701 return 0;
1702 out_err:
1703 put_recvmsg(t, recvmsg);
1704 return ret;
1705 }
1706
smb_direct_get_max_fr_pages(struct smb_direct_transport * t)1707 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
1708 {
1709 return min_t(unsigned int,
1710 t->cm_id->device->attrs.max_fast_reg_page_list_len,
1711 256);
1712 }
1713
smb_direct_init_params(struct smb_direct_transport * t,struct ib_qp_cap * cap)1714 static int smb_direct_init_params(struct smb_direct_transport *t,
1715 struct ib_qp_cap *cap)
1716 {
1717 struct ib_device *device = t->cm_id->device;
1718 int max_send_sges, max_rw_wrs, max_send_wrs;
1719 unsigned int max_sge_per_wr, wrs_per_credit;
1720
1721 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
1722 * SMB2 response could be mapped.
1723 */
1724 t->max_send_size = smb_direct_max_send_size;
1725 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
1726 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
1727 pr_err("max_send_size %d is too large\n", t->max_send_size);
1728 return -EINVAL;
1729 }
1730
1731 /* Calculate the number of work requests for RDMA R/W.
1732 * The maximum number of pages which can be registered
1733 * with one Memory region can be transferred with one
1734 * R/W credit. And at least 4 work requests for each credit
1735 * are needed for MR registration, RDMA R/W, local & remote
1736 * MR invalidation.
1737 */
1738 t->max_rdma_rw_size = smb_direct_max_read_write_size;
1739 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
1740 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
1741 (t->pages_per_rw_credit - 1) *
1742 PAGE_SIZE);
1743
1744 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
1745 device->attrs.max_sge_rd);
1746 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
1747 max_send_sges);
1748 wrs_per_credit = max_t(unsigned int, 4,
1749 DIV_ROUND_UP(t->pages_per_rw_credit,
1750 max_sge_per_wr) + 1);
1751 max_rw_wrs = t->max_rw_credits * wrs_per_credit;
1752
1753 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1754 if (max_send_wrs > device->attrs.max_cqe ||
1755 max_send_wrs > device->attrs.max_qp_wr) {
1756 pr_err("consider lowering send_credit_target = %d\n",
1757 smb_direct_send_credit_target);
1758 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1759 device->attrs.max_cqe, device->attrs.max_qp_wr);
1760 return -EINVAL;
1761 }
1762
1763 if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1764 smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
1765 pr_err("consider lowering receive_credit_max = %d\n",
1766 smb_direct_receive_credit_max);
1767 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1768 device->attrs.max_cqe, device->attrs.max_qp_wr);
1769 return -EINVAL;
1770 }
1771
1772 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
1773 pr_err("warning: device max_recv_sge = %d too small\n",
1774 device->attrs.max_recv_sge);
1775 return -EINVAL;
1776 }
1777
1778 t->recv_credits = 0;
1779 t->count_avail_recvmsg = 0;
1780
1781 t->recv_credit_max = smb_direct_receive_credit_max;
1782 t->recv_credit_target = 10;
1783 t->new_recv_credits = 0;
1784
1785 t->send_credit_target = smb_direct_send_credit_target;
1786 atomic_set(&t->send_credits, 0);
1787 atomic_set(&t->rw_credits, t->max_rw_credits);
1788
1789 t->max_send_size = smb_direct_max_send_size;
1790 t->max_recv_size = smb_direct_max_receive_size;
1791 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1792
1793 cap->max_send_wr = max_send_wrs;
1794 cap->max_recv_wr = t->recv_credit_max;
1795 cap->max_send_sge = max_sge_per_wr;
1796 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1797 cap->max_inline_data = 0;
1798 cap->max_rdma_ctxs = t->max_rw_credits;
1799 return 0;
1800 }
1801
smb_direct_destroy_pools(struct smb_direct_transport * t)1802 static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1803 {
1804 struct smb_direct_recvmsg *recvmsg;
1805
1806 while ((recvmsg = get_free_recvmsg(t)))
1807 mempool_free(recvmsg, t->recvmsg_mempool);
1808 while ((recvmsg = get_empty_recvmsg(t)))
1809 mempool_free(recvmsg, t->recvmsg_mempool);
1810
1811 mempool_destroy(t->recvmsg_mempool);
1812 t->recvmsg_mempool = NULL;
1813
1814 kmem_cache_destroy(t->recvmsg_cache);
1815 t->recvmsg_cache = NULL;
1816
1817 mempool_destroy(t->sendmsg_mempool);
1818 t->sendmsg_mempool = NULL;
1819
1820 kmem_cache_destroy(t->sendmsg_cache);
1821 t->sendmsg_cache = NULL;
1822 }
1823
smb_direct_create_pools(struct smb_direct_transport * t)1824 static int smb_direct_create_pools(struct smb_direct_transport *t)
1825 {
1826 char name[80];
1827 int i;
1828 struct smb_direct_recvmsg *recvmsg;
1829
1830 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1831 t->sendmsg_cache = kmem_cache_create(name,
1832 sizeof(struct smb_direct_sendmsg) +
1833 sizeof(struct smb_direct_negotiate_resp),
1834 0, SLAB_HWCACHE_ALIGN, NULL);
1835 if (!t->sendmsg_cache)
1836 return -ENOMEM;
1837
1838 t->sendmsg_mempool = mempool_create(t->send_credit_target,
1839 mempool_alloc_slab, mempool_free_slab,
1840 t->sendmsg_cache);
1841 if (!t->sendmsg_mempool)
1842 goto err;
1843
1844 snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1845 t->recvmsg_cache = kmem_cache_create(name,
1846 sizeof(struct smb_direct_recvmsg) +
1847 t->max_recv_size,
1848 0, SLAB_HWCACHE_ALIGN, NULL);
1849 if (!t->recvmsg_cache)
1850 goto err;
1851
1852 t->recvmsg_mempool =
1853 mempool_create(t->recv_credit_max, mempool_alloc_slab,
1854 mempool_free_slab, t->recvmsg_cache);
1855 if (!t->recvmsg_mempool)
1856 goto err;
1857
1858 INIT_LIST_HEAD(&t->recvmsg_queue);
1859
1860 for (i = 0; i < t->recv_credit_max; i++) {
1861 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP);
1862 if (!recvmsg)
1863 goto err;
1864 recvmsg->transport = t;
1865 list_add(&recvmsg->list, &t->recvmsg_queue);
1866 }
1867 t->count_avail_recvmsg = t->recv_credit_max;
1868
1869 return 0;
1870 err:
1871 smb_direct_destroy_pools(t);
1872 return -ENOMEM;
1873 }
1874
smb_direct_create_qpair(struct smb_direct_transport * t,struct ib_qp_cap * cap)1875 static int smb_direct_create_qpair(struct smb_direct_transport *t,
1876 struct ib_qp_cap *cap)
1877 {
1878 int ret;
1879 struct ib_qp_init_attr qp_attr;
1880 int pages_per_rw;
1881
1882 t->pd = ib_alloc_pd(t->cm_id->device, 0);
1883 if (IS_ERR(t->pd)) {
1884 pr_err("Can't create RDMA PD\n");
1885 ret = PTR_ERR(t->pd);
1886 t->pd = NULL;
1887 return ret;
1888 }
1889
1890 t->send_cq = ib_alloc_cq(t->cm_id->device, t,
1891 smb_direct_send_credit_target + cap->max_rdma_ctxs,
1892 0, IB_POLL_WORKQUEUE);
1893 if (IS_ERR(t->send_cq)) {
1894 pr_err("Can't create RDMA send CQ\n");
1895 ret = PTR_ERR(t->send_cq);
1896 t->send_cq = NULL;
1897 goto err;
1898 }
1899
1900 t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
1901 t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
1902 if (IS_ERR(t->recv_cq)) {
1903 pr_err("Can't create RDMA recv CQ\n");
1904 ret = PTR_ERR(t->recv_cq);
1905 t->recv_cq = NULL;
1906 goto err;
1907 }
1908
1909 memset(&qp_attr, 0, sizeof(qp_attr));
1910 qp_attr.event_handler = smb_direct_qpair_handler;
1911 qp_attr.qp_context = t;
1912 qp_attr.cap = *cap;
1913 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1914 qp_attr.qp_type = IB_QPT_RC;
1915 qp_attr.send_cq = t->send_cq;
1916 qp_attr.recv_cq = t->recv_cq;
1917 qp_attr.port_num = ~0;
1918
1919 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1920 if (ret) {
1921 pr_err("Can't create RDMA QP: %d\n", ret);
1922 goto err;
1923 }
1924
1925 t->qp = t->cm_id->qp;
1926 t->cm_id->event_handler = smb_direct_cm_handler;
1927
1928 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1929 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
1930 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
1931 t->max_rw_credits, IB_MR_TYPE_MEM_REG,
1932 t->pages_per_rw_credit, 0);
1933 if (ret) {
1934 pr_err("failed to init mr pool count %d pages %d\n",
1935 t->max_rw_credits, t->pages_per_rw_credit);
1936 goto err;
1937 }
1938 }
1939
1940 return 0;
1941 err:
1942 if (t->qp) {
1943 ib_destroy_qp(t->qp);
1944 t->qp = NULL;
1945 }
1946 if (t->recv_cq) {
1947 ib_destroy_cq(t->recv_cq);
1948 t->recv_cq = NULL;
1949 }
1950 if (t->send_cq) {
1951 ib_destroy_cq(t->send_cq);
1952 t->send_cq = NULL;
1953 }
1954 if (t->pd) {
1955 ib_dealloc_pd(t->pd);
1956 t->pd = NULL;
1957 }
1958 return ret;
1959 }
1960
smb_direct_prepare(struct ksmbd_transport * t)1961 static int smb_direct_prepare(struct ksmbd_transport *t)
1962 {
1963 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1964 struct smb_direct_recvmsg *recvmsg;
1965 struct smb_direct_negotiate_req *req;
1966 int ret;
1967
1968 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1969 ret = wait_event_interruptible_timeout(st->wait_status,
1970 st->negotiation_requested ||
1971 st->status == SMB_DIRECT_CS_DISCONNECTED,
1972 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
1973 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
1974 return ret < 0 ? ret : -ETIMEDOUT;
1975
1976 recvmsg = get_first_reassembly(st);
1977 if (!recvmsg)
1978 return -ECONNABORTED;
1979
1980 ret = smb_direct_check_recvmsg(recvmsg);
1981 if (ret == -ECONNABORTED)
1982 goto out;
1983
1984 req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1985 st->max_recv_size = min_t(int, st->max_recv_size,
1986 le32_to_cpu(req->preferred_send_size));
1987 st->max_send_size = min_t(int, st->max_send_size,
1988 le32_to_cpu(req->max_receive_size));
1989 st->max_fragmented_send_size =
1990 le32_to_cpu(req->max_fragmented_size);
1991 st->max_fragmented_recv_size =
1992 (st->recv_credit_max * st->max_recv_size) / 2;
1993
1994 ret = smb_direct_send_negotiate_response(st, ret);
1995 out:
1996 spin_lock_irq(&st->reassembly_queue_lock);
1997 st->reassembly_queue_length--;
1998 list_del(&recvmsg->list);
1999 spin_unlock_irq(&st->reassembly_queue_lock);
2000 put_recvmsg(st, recvmsg);
2001
2002 return ret;
2003 }
2004
smb_direct_connect(struct smb_direct_transport * st)2005 static int smb_direct_connect(struct smb_direct_transport *st)
2006 {
2007 int ret;
2008 struct ib_qp_cap qp_cap;
2009
2010 ret = smb_direct_init_params(st, &qp_cap);
2011 if (ret) {
2012 pr_err("Can't configure RDMA parameters\n");
2013 return ret;
2014 }
2015
2016 ret = smb_direct_create_pools(st);
2017 if (ret) {
2018 pr_err("Can't init RDMA pool: %d\n", ret);
2019 return ret;
2020 }
2021
2022 ret = smb_direct_create_qpair(st, &qp_cap);
2023 if (ret) {
2024 pr_err("Can't accept RDMA client: %d\n", ret);
2025 return ret;
2026 }
2027
2028 ret = smb_direct_prepare_negotiation(st);
2029 if (ret) {
2030 pr_err("Can't negotiate: %d\n", ret);
2031 return ret;
2032 }
2033 return 0;
2034 }
2035
rdma_frwr_is_supported(struct ib_device_attr * attrs)2036 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
2037 {
2038 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
2039 return false;
2040 if (attrs->max_fast_reg_page_list_len == 0)
2041 return false;
2042 return true;
2043 }
2044
smb_direct_handle_connect_request(struct rdma_cm_id * new_cm_id)2045 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
2046 {
2047 struct smb_direct_transport *t;
2048 struct task_struct *handler;
2049 int ret;
2050
2051 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
2052 ksmbd_debug(RDMA,
2053 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
2054 new_cm_id->device->attrs.device_cap_flags);
2055 return -EPROTONOSUPPORT;
2056 }
2057
2058 t = alloc_transport(new_cm_id);
2059 if (!t)
2060 return -ENOMEM;
2061
2062 ret = smb_direct_connect(t);
2063 if (ret)
2064 goto out_err;
2065
2066 handler = kthread_run(ksmbd_conn_handler_loop,
2067 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
2068 smb_direct_port);
2069 if (IS_ERR(handler)) {
2070 ret = PTR_ERR(handler);
2071 pr_err("Can't start thread\n");
2072 goto out_err;
2073 }
2074
2075 return 0;
2076 out_err:
2077 free_transport(t);
2078 return ret;
2079 }
2080
smb_direct_listen_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)2081 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
2082 struct rdma_cm_event *event)
2083 {
2084 switch (event->event) {
2085 case RDMA_CM_EVENT_CONNECT_REQUEST: {
2086 int ret = smb_direct_handle_connect_request(cm_id);
2087
2088 if (ret) {
2089 pr_err("Can't create transport: %d\n", ret);
2090 return ret;
2091 }
2092
2093 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
2094 cm_id);
2095 break;
2096 }
2097 default:
2098 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
2099 cm_id, rdma_event_msg(event->event), event->event);
2100 break;
2101 }
2102 return 0;
2103 }
2104
smb_direct_listen(int port)2105 static int smb_direct_listen(int port)
2106 {
2107 int ret;
2108 struct rdma_cm_id *cm_id;
2109 struct sockaddr_in sin = {
2110 .sin_family = AF_INET,
2111 .sin_addr.s_addr = htonl(INADDR_ANY),
2112 .sin_port = htons(port),
2113 };
2114
2115 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
2116 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
2117 if (IS_ERR(cm_id)) {
2118 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
2119 return PTR_ERR(cm_id);
2120 }
2121
2122 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
2123 if (ret) {
2124 pr_err("Can't bind: %d\n", ret);
2125 goto err;
2126 }
2127
2128 smb_direct_listener.cm_id = cm_id;
2129
2130 ret = rdma_listen(cm_id, 10);
2131 if (ret) {
2132 pr_err("Can't listen: %d\n", ret);
2133 goto err;
2134 }
2135 return 0;
2136 err:
2137 smb_direct_listener.cm_id = NULL;
2138 rdma_destroy_id(cm_id);
2139 return ret;
2140 }
2141
smb_direct_ib_client_add(struct ib_device * ib_dev)2142 static int smb_direct_ib_client_add(struct ib_device *ib_dev)
2143 {
2144 struct smb_direct_device *smb_dev;
2145
2146 /* Set 5445 port if device type is iWARP(No IB) */
2147 if (ib_dev->node_type != RDMA_NODE_IB_CA)
2148 smb_direct_port = SMB_DIRECT_PORT_IWARP;
2149
2150 if (!rdma_frwr_is_supported(&ib_dev->attrs))
2151 return 0;
2152
2153 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP);
2154 if (!smb_dev)
2155 return -ENOMEM;
2156 smb_dev->ib_dev = ib_dev;
2157
2158 write_lock(&smb_direct_device_lock);
2159 list_add(&smb_dev->list, &smb_direct_device_list);
2160 write_unlock(&smb_direct_device_lock);
2161
2162 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
2163 return 0;
2164 }
2165
smb_direct_ib_client_remove(struct ib_device * ib_dev,void * client_data)2166 static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
2167 void *client_data)
2168 {
2169 struct smb_direct_device *smb_dev, *tmp;
2170
2171 write_lock(&smb_direct_device_lock);
2172 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
2173 if (smb_dev->ib_dev == ib_dev) {
2174 list_del(&smb_dev->list);
2175 kfree(smb_dev);
2176 break;
2177 }
2178 }
2179 write_unlock(&smb_direct_device_lock);
2180 }
2181
2182 static struct ib_client smb_direct_ib_client = {
2183 .name = "ksmbd_smb_direct_ib",
2184 .add = smb_direct_ib_client_add,
2185 .remove = smb_direct_ib_client_remove,
2186 };
2187
ksmbd_rdma_init(void)2188 int ksmbd_rdma_init(void)
2189 {
2190 int ret;
2191
2192 smb_direct_listener.cm_id = NULL;
2193
2194 ret = ib_register_client(&smb_direct_ib_client);
2195 if (ret) {
2196 pr_err("failed to ib_register_client\n");
2197 return ret;
2198 }
2199
2200 /* When a client is running out of send credits, the credits are
2201 * granted by the server's sending a packet using this queue.
2202 * This avoids the situation that a clients cannot send packets
2203 * for lack of credits
2204 */
2205 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
2206 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
2207 if (!smb_direct_wq)
2208 return -ENOMEM;
2209
2210 ret = smb_direct_listen(smb_direct_port);
2211 if (ret) {
2212 destroy_workqueue(smb_direct_wq);
2213 smb_direct_wq = NULL;
2214 pr_err("Can't listen: %d\n", ret);
2215 return ret;
2216 }
2217
2218 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
2219 smb_direct_listener.cm_id);
2220 return 0;
2221 }
2222
ksmbd_rdma_destroy(void)2223 void ksmbd_rdma_destroy(void)
2224 {
2225 if (!smb_direct_listener.cm_id)
2226 return;
2227
2228 ib_unregister_client(&smb_direct_ib_client);
2229 rdma_destroy_id(smb_direct_listener.cm_id);
2230
2231 smb_direct_listener.cm_id = NULL;
2232
2233 if (smb_direct_wq) {
2234 destroy_workqueue(smb_direct_wq);
2235 smb_direct_wq = NULL;
2236 }
2237 }
2238
ksmbd_rdma_capable_netdev(struct net_device * netdev)2239 bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2240 {
2241 struct smb_direct_device *smb_dev;
2242 int i;
2243 bool rdma_capable = false;
2244
2245 read_lock(&smb_direct_device_lock);
2246 list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
2247 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
2248 struct net_device *ndev;
2249
2250 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1);
2251 if (!ndev)
2252 continue;
2253
2254 if (ndev == netdev) {
2255 dev_put(ndev);
2256 rdma_capable = true;
2257 goto out;
2258 }
2259 dev_put(ndev);
2260 }
2261 }
2262 out:
2263 read_unlock(&smb_direct_device_lock);
2264
2265 if (rdma_capable == false) {
2266 struct ib_device *ibdev;
2267
2268 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2269 if (ibdev) {
2270 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs);
2271 ib_device_put(ibdev);
2272 }
2273 }
2274
2275 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n",
2276 netdev->name, str_true_false(rdma_capable));
2277
2278 return rdma_capable;
2279 }
2280
2281 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2282 .prepare = smb_direct_prepare,
2283 .disconnect = smb_direct_disconnect,
2284 .shutdown = smb_direct_shutdown,
2285 .writev = smb_direct_writev,
2286 .read = smb_direct_read,
2287 .rdma_read = smb_direct_rdma_read,
2288 .rdma_write = smb_direct_rdma_write,
2289 .free_transport = smb_direct_free_transport,
2290 };
2291