1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/smp.h>
5 #include "dr_types.h"
6
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10 #define DR_SEND_INFO_POOL_SIZE 1000
11
12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
13
14 struct dr_data_seg {
15 u64 addr;
16 u32 length;
17 u32 lkey;
18 unsigned int send_flags;
19 };
20
21 enum send_info_type {
22 WRITE_ICM = 0,
23 GTA_ARG = 1,
24 };
25
26 struct postsend_info {
27 enum send_info_type type;
28 struct dr_data_seg write;
29 struct dr_data_seg read;
30 u64 remote_addr;
31 u32 rkey;
32 };
33
34 struct dr_qp_rtr_attr {
35 struct mlx5dr_cmd_gid_attr dgid_attr;
36 enum ib_mtu mtu;
37 u32 qp_num;
38 u16 port_num;
39 u8 min_rnr_timer;
40 u8 sgid_index;
41 u16 udp_src_port;
42 u8 fl:1;
43 };
44
45 struct dr_qp_rts_attr {
46 u8 timeout;
47 u8 retry_cnt;
48 u8 rnr_retry;
49 };
50
51 struct dr_qp_init_attr {
52 u32 cqn;
53 u32 pdn;
54 u32 max_send_wr;
55 struct mlx5_uars_page *uar;
56 u8 isolate_vl_tc:1;
57 };
58
59 struct mlx5dr_send_info_pool_obj {
60 struct mlx5dr_ste_send_info ste_send_info;
61 struct mlx5dr_send_info_pool *pool;
62 struct list_head list_node;
63 };
64
65 struct mlx5dr_send_info_pool {
66 struct list_head free_list;
67 };
68
dr_send_info_pool_fill(struct mlx5dr_send_info_pool * pool)69 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool)
70 {
71 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
72 int i;
73
74 for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) {
75 pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL);
76 if (!pool_obj)
77 goto clean_pool;
78
79 pool_obj->pool = pool;
80 list_add_tail(&pool_obj->list_node, &pool->free_list);
81 }
82
83 return 0;
84
85 clean_pool:
86 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
87 list_del(&pool_obj->list_node);
88 kfree(pool_obj);
89 }
90
91 return -ENOMEM;
92 }
93
dr_send_info_pool_destroy(struct mlx5dr_send_info_pool * pool)94 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool)
95 {
96 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
97
98 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
99 list_del(&pool_obj->list_node);
100 kfree(pool_obj);
101 }
102
103 kfree(pool);
104 }
105
mlx5dr_send_info_pool_destroy(struct mlx5dr_domain * dmn)106 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn)
107 {
108 dr_send_info_pool_destroy(dmn->send_info_pool_tx);
109 dr_send_info_pool_destroy(dmn->send_info_pool_rx);
110 }
111
dr_send_info_pool_create(void)112 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void)
113 {
114 struct mlx5dr_send_info_pool *pool;
115 int ret;
116
117 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
118 if (!pool)
119 return NULL;
120
121 INIT_LIST_HEAD(&pool->free_list);
122
123 ret = dr_send_info_pool_fill(pool);
124 if (ret) {
125 kfree(pool);
126 return NULL;
127 }
128
129 return pool;
130 }
131
mlx5dr_send_info_pool_create(struct mlx5dr_domain * dmn)132 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn)
133 {
134 dmn->send_info_pool_rx = dr_send_info_pool_create();
135 if (!dmn->send_info_pool_rx)
136 return -ENOMEM;
137
138 dmn->send_info_pool_tx = dr_send_info_pool_create();
139 if (!dmn->send_info_pool_tx) {
140 dr_send_info_pool_destroy(dmn->send_info_pool_rx);
141 return -ENOMEM;
142 }
143
144 return 0;
145 }
146
147 struct mlx5dr_ste_send_info
mlx5dr_send_info_alloc(struct mlx5dr_domain * dmn,enum mlx5dr_domain_nic_type nic_type)148 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn,
149 enum mlx5dr_domain_nic_type nic_type)
150 {
151 struct mlx5dr_send_info_pool_obj *pool_obj;
152 struct mlx5dr_send_info_pool *pool;
153 int ret;
154
155 pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx :
156 dmn->send_info_pool_tx;
157
158 if (unlikely(list_empty(&pool->free_list))) {
159 ret = dr_send_info_pool_fill(pool);
160 if (ret)
161 return NULL;
162 }
163
164 pool_obj = list_first_entry_or_null(&pool->free_list,
165 struct mlx5dr_send_info_pool_obj,
166 list_node);
167
168 if (likely(pool_obj)) {
169 list_del_init(&pool_obj->list_node);
170 } else {
171 WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool");
172 return NULL;
173 }
174
175 return &pool_obj->ste_send_info;
176 }
177
mlx5dr_send_info_free(struct mlx5dr_ste_send_info * ste_send_info)178 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info)
179 {
180 struct mlx5dr_send_info_pool_obj *pool_obj;
181
182 pool_obj = container_of(ste_send_info,
183 struct mlx5dr_send_info_pool_obj,
184 ste_send_info);
185
186 list_add(&pool_obj->list_node, &pool_obj->pool->free_list);
187 }
188
dr_parse_cqe(struct mlx5dr_cq * dr_cq,struct mlx5_cqe64 * cqe64)189 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
190 {
191 unsigned int idx;
192 u8 opcode;
193
194 opcode = get_cqe_opcode(cqe64);
195 if (opcode == MLX5_CQE_REQ_ERR) {
196 idx = be16_to_cpu(cqe64->wqe_counter) &
197 (dr_cq->qp->sq.wqe_cnt - 1);
198 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
199 } else if (opcode == MLX5_CQE_RESP_ERR) {
200 ++dr_cq->qp->sq.cc;
201 } else {
202 idx = be16_to_cpu(cqe64->wqe_counter) &
203 (dr_cq->qp->sq.wqe_cnt - 1);
204 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
205
206 return CQ_OK;
207 }
208
209 return CQ_POLL_ERR;
210 }
211
dr_cq_poll_one(struct mlx5dr_cq * dr_cq)212 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
213 {
214 struct mlx5_cqe64 *cqe64;
215 int err;
216
217 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
218 if (!cqe64) {
219 if (unlikely(dr_cq->mdev->state ==
220 MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
221 mlx5_core_dbg_once(dr_cq->mdev,
222 "Polling CQ while device is shutting down\n");
223 return CQ_POLL_ERR;
224 }
225 return CQ_EMPTY;
226 }
227
228 mlx5_cqwq_pop(&dr_cq->wq);
229 err = dr_parse_cqe(dr_cq, cqe64);
230 mlx5_cqwq_update_db_record(&dr_cq->wq);
231
232 return err;
233 }
234
dr_poll_cq(struct mlx5dr_cq * dr_cq,int ne)235 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
236 {
237 int npolled;
238 int err = 0;
239
240 for (npolled = 0; npolled < ne; ++npolled) {
241 err = dr_cq_poll_one(dr_cq);
242 if (err != CQ_OK)
243 break;
244 }
245
246 return err == CQ_POLL_ERR ? err : npolled;
247 }
248
dr_create_rc_qp(struct mlx5_core_dev * mdev,struct dr_qp_init_attr * attr)249 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
250 struct dr_qp_init_attr *attr)
251 {
252 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
253 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
254 struct mlx5_wq_param wqp;
255 struct mlx5dr_qp *dr_qp;
256 int inlen;
257 void *qpc;
258 void *in;
259 int err;
260
261 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
262 if (!dr_qp)
263 return NULL;
264
265 wqp.buf_numa_node = mdev->priv.numa_node;
266 wqp.db_numa_node = mdev->priv.numa_node;
267
268 dr_qp->rq.pc = 0;
269 dr_qp->rq.cc = 0;
270 dr_qp->rq.wqe_cnt = 256;
271 dr_qp->sq.pc = 0;
272 dr_qp->sq.cc = 0;
273 dr_qp->sq.head = 0;
274 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
275
276 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
277 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
278 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
279 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
280 &dr_qp->wq_ctrl);
281 if (err) {
282 mlx5_core_warn(mdev, "Can't create QP WQ\n");
283 goto err_wq;
284 }
285
286 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
287 sizeof(dr_qp->sq.wqe_head[0]),
288 GFP_KERNEL);
289
290 if (!dr_qp->sq.wqe_head) {
291 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
292 goto err_wqe_head;
293 }
294
295 inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
296 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
297 dr_qp->wq_ctrl.buf.npages;
298 in = kvzalloc(inlen, GFP_KERNEL);
299 if (!in) {
300 err = -ENOMEM;
301 goto err_in;
302 }
303
304 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
305 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
306 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
307 MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
308 MLX5_SET(qpc, qpc, pd, attr->pdn);
309 MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
310 MLX5_SET(qpc, qpc, log_page_size,
311 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
312 MLX5_SET(qpc, qpc, fre, 1);
313 MLX5_SET(qpc, qpc, rlky, 1);
314 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
315 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
316 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
317 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
318 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
319 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
320 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
321 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
322 if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
323 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
324 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
325 (__be64 *)MLX5_ADDR_OF(create_qp_in,
326 in, pas));
327
328 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
329 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
330 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
331 kvfree(in);
332 if (err)
333 goto err_in;
334 dr_qp->uar = attr->uar;
335
336 return dr_qp;
337
338 err_in:
339 kfree(dr_qp->sq.wqe_head);
340 err_wqe_head:
341 mlx5_wq_destroy(&dr_qp->wq_ctrl);
342 err_wq:
343 kfree(dr_qp);
344 return NULL;
345 }
346
dr_destroy_qp(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp)347 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
348 struct mlx5dr_qp *dr_qp)
349 {
350 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
351
352 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
353 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
354 mlx5_cmd_exec_in(mdev, destroy_qp, in);
355
356 kfree(dr_qp->sq.wqe_head);
357 mlx5_wq_destroy(&dr_qp->wq_ctrl);
358 kfree(dr_qp);
359 }
360
dr_cmd_notify_hw(struct mlx5dr_qp * dr_qp,void * ctrl)361 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
362 {
363 dma_wmb();
364 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
365
366 /* After wmb() the hw aware of new work */
367 wmb();
368
369 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
370 }
371
372 static void
dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg * wq_ctrl,u32 remote_addr,struct dr_data_seg * data_seg,int * size)373 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
374 u32 remote_addr,
375 struct dr_data_seg *data_seg,
376 int *size)
377 {
378 struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
379 struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
380
381 wq_ctrl->general_id = cpu_to_be32(remote_addr);
382 wq_flow_seg = (void *)(wq_ctrl + 1);
383
384 /* mlx5_wqe_flow_update_ctrl_seg - all reserved */
385 memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
386 wq_arg_seg = (void *)(wq_flow_seg + 1);
387
388 memcpy(wq_arg_seg->argument_list,
389 (void *)(uintptr_t)data_seg->addr,
390 data_seg->length);
391
392 *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
393 sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */
394 sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */
395 MLX5_SEND_WQE_DS;
396 }
397
398 static void
dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg * wq_ctrl,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,unsigned int * size)399 dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
400 u64 remote_addr,
401 u32 rkey,
402 struct dr_data_seg *data_seg,
403 unsigned int *size)
404 {
405 struct mlx5_wqe_raddr_seg *wq_raddr;
406 struct mlx5_wqe_data_seg *wq_dseg;
407
408 wq_raddr = (void *)(wq_ctrl + 1);
409
410 wq_raddr->raddr = cpu_to_be64(remote_addr);
411 wq_raddr->rkey = cpu_to_be32(rkey);
412 wq_raddr->reserved = 0;
413
414 wq_dseg = (void *)(wq_raddr + 1);
415
416 wq_dseg->byte_count = cpu_to_be32(data_seg->length);
417 wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
418 wq_dseg->addr = cpu_to_be64(data_seg->addr);
419
420 *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
421 sizeof(*wq_dseg) + /* WQE data segment */
422 sizeof(*wq_raddr)) / /* WQE remote addr segment */
423 MLX5_SEND_WQE_DS;
424 }
425
dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg * wq_ctrl,struct dr_data_seg * data_seg)426 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
427 struct dr_data_seg *data_seg)
428 {
429 wq_ctrl->signature = 0;
430 wq_ctrl->rsvd[0] = 0;
431 wq_ctrl->rsvd[1] = 0;
432 wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
433 MLX5_WQE_CTRL_CQ_UPDATE : 0;
434 wq_ctrl->imm = 0;
435 }
436
dr_rdma_segments(struct mlx5dr_qp * dr_qp,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,u32 opcode,bool notify_hw)437 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
438 u32 rkey, struct dr_data_seg *data_seg,
439 u32 opcode, bool notify_hw)
440 {
441 struct mlx5_wqe_ctrl_seg *wq_ctrl;
442 int opcode_mod = 0;
443 unsigned int size;
444 unsigned int idx;
445
446 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
447
448 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
449 dr_set_ctrl_seg(wq_ctrl, data_seg);
450
451 switch (opcode) {
452 case MLX5_OPCODE_RDMA_READ:
453 case MLX5_OPCODE_RDMA_WRITE:
454 dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
455 rkey, data_seg, &size);
456 break;
457 case MLX5_OPCODE_FLOW_TBL_ACCESS:
458 opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
459 dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
460 data_seg, &size);
461 break;
462 default:
463 WARN(true, "illegal opcode %d", opcode);
464 return;
465 }
466
467 /* --------------------------------------------------------
468 * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
469 * --------------------------------------------------------
470 */
471 wq_ctrl->opmod_idx_opcode =
472 cpu_to_be32((opcode_mod << 24) |
473 ((dr_qp->sq.pc & 0xffff) << 8) |
474 opcode);
475 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
476
477 dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
478 dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
479
480 if (notify_hw)
481 dr_cmd_notify_hw(dr_qp, wq_ctrl);
482 }
483
dr_post_send(struct mlx5dr_qp * dr_qp,struct postsend_info * send_info)484 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
485 {
486 if (send_info->type == WRITE_ICM) {
487 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
488 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
489 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
490 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
491 } else { /* GTA_ARG */
492 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
493 &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
494 }
495
496 }
497
498 /**
499 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
500 * with send_list parameters:
501 *
502 * @ste: The data that attached to this specific ste
503 * @size: of data to write
504 * @offset: of the data from start of the hw_ste entry
505 * @data: data
506 * @ste_info: ste to be sent with send_list
507 * @send_list: to append into it
508 * @copy_data: if true indicates that the data should be kept because
509 * it's not backuped any where (like in re-hash).
510 * if false, it lets the data to be updated after
511 * it was added to the list.
512 */
mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste * ste,u16 size,u16 offset,u8 * data,struct mlx5dr_ste_send_info * ste_info,struct list_head * send_list,bool copy_data)513 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
514 u16 offset, u8 *data,
515 struct mlx5dr_ste_send_info *ste_info,
516 struct list_head *send_list,
517 bool copy_data)
518 {
519 ste_info->size = size;
520 ste_info->ste = ste;
521 ste_info->offset = offset;
522
523 if (copy_data) {
524 memcpy(ste_info->data_cont, data, size);
525 ste_info->data = ste_info->data_cont;
526 } else {
527 ste_info->data = data;
528 }
529
530 list_add_tail(&ste_info->send_list, send_list);
531 }
532
533 /* The function tries to consume one wc each time, unless the queue is full, in
534 * that case, which means that the hw is behind the sw in a full queue len
535 * the function will drain the cq till it empty.
536 */
dr_handle_pending_wc(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)537 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
538 struct mlx5dr_send_ring *send_ring)
539 {
540 bool is_drain = false;
541 int ne;
542
543 if (send_ring->pending_wqe < send_ring->signal_th)
544 return 0;
545
546 /* Queue is full start drain it */
547 if (send_ring->pending_wqe >=
548 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
549 is_drain = true;
550
551 do {
552 ne = dr_poll_cq(send_ring->cq, 1);
553 if (unlikely(ne < 0)) {
554 mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
555 send_ring->qp->qpn);
556 send_ring->err_state = true;
557 return ne;
558 } else if (ne == 1) {
559 send_ring->pending_wqe -= send_ring->signal_th;
560 }
561 } while (ne == 1 ||
562 (is_drain && send_ring->pending_wqe >= send_ring->signal_th));
563
564 return 0;
565 }
566
dr_fill_write_args_segs(struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)567 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
568 struct postsend_info *send_info)
569 {
570 send_ring->pending_wqe++;
571
572 if (send_ring->pending_wqe % send_ring->signal_th == 0)
573 send_info->write.send_flags |= IB_SEND_SIGNALED;
574 else
575 send_info->write.send_flags = 0;
576 }
577
dr_fill_write_icm_segs(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)578 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
579 struct mlx5dr_send_ring *send_ring,
580 struct postsend_info *send_info)
581 {
582 u32 buff_offset;
583
584 if (send_info->write.length > dmn->info.max_inline_size) {
585 buff_offset = (send_ring->tx_head &
586 (dmn->send_ring->signal_th - 1)) *
587 send_ring->max_post_send_size;
588 /* Copy to ring mr */
589 memcpy(send_ring->buf + buff_offset,
590 (void *)(uintptr_t)send_info->write.addr,
591 send_info->write.length);
592 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
593 send_info->write.lkey = send_ring->mr->mkey;
594
595 send_ring->tx_head++;
596 }
597
598 send_ring->pending_wqe++;
599
600 if (send_ring->pending_wqe % send_ring->signal_th == 0)
601 send_info->write.send_flags |= IB_SEND_SIGNALED;
602
603 send_ring->pending_wqe++;
604 send_info->read.length = send_info->write.length;
605
606 /* Read into dedicated sync buffer */
607 send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
608 send_info->read.lkey = send_ring->sync_mr->mkey;
609
610 if (send_ring->pending_wqe % send_ring->signal_th == 0)
611 send_info->read.send_flags = IB_SEND_SIGNALED;
612 else
613 send_info->read.send_flags = 0;
614 }
615
dr_fill_data_segs(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)616 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
617 struct mlx5dr_send_ring *send_ring,
618 struct postsend_info *send_info)
619 {
620 if (send_info->type == WRITE_ICM)
621 dr_fill_write_icm_segs(dmn, send_ring, send_info);
622 else /* args */
623 dr_fill_write_args_segs(send_ring, send_info);
624 }
625
dr_postsend_icm_data(struct mlx5dr_domain * dmn,struct postsend_info * send_info)626 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
627 struct postsend_info *send_info)
628 {
629 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
630 int ret;
631
632 if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
633 send_ring->err_state)) {
634 mlx5_core_dbg_once(dmn->mdev,
635 "Skipping post send: QP err state: %d, device state: %d\n",
636 send_ring->err_state, dmn->mdev->state);
637 return 0;
638 }
639
640 spin_lock(&send_ring->lock);
641
642 ret = dr_handle_pending_wc(dmn, send_ring);
643 if (ret)
644 goto out_unlock;
645
646 dr_fill_data_segs(dmn, send_ring, send_info);
647 dr_post_send(send_ring->qp, send_info);
648
649 out_unlock:
650 spin_unlock(&send_ring->lock);
651 return ret;
652 }
653
dr_get_tbl_copy_details(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 ** data,u32 * byte_size,int * iterations,int * num_stes)654 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
655 struct mlx5dr_ste_htbl *htbl,
656 u8 **data,
657 u32 *byte_size,
658 int *iterations,
659 int *num_stes)
660 {
661 u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
662 int alloc_size;
663
664 if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
665 *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
666 *byte_size = dmn->send_ring->max_post_send_size;
667 alloc_size = *byte_size;
668 *num_stes = *byte_size / DR_STE_SIZE;
669 } else {
670 *iterations = 1;
671 *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
672 alloc_size = *num_stes * DR_STE_SIZE;
673 }
674
675 *data = kvzalloc(alloc_size, GFP_KERNEL);
676 if (!*data)
677 return -ENOMEM;
678
679 return 0;
680 }
681
682 /**
683 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
684 *
685 * @dmn: Domain
686 * @ste: The ste struct that contains the data (at
687 * least part of it)
688 * @data: The real data to send size data
689 * @size: for writing.
690 * @offset: The offset from the icm mapped data to
691 * start write to this for write only part of the
692 * buffer.
693 *
694 * Return: 0 on success.
695 */
mlx5dr_send_postsend_ste(struct mlx5dr_domain * dmn,struct mlx5dr_ste * ste,u8 * data,u16 size,u16 offset)696 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
697 u8 *data, u16 size, u16 offset)
698 {
699 struct postsend_info send_info = {};
700
701 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
702
703 send_info.write.addr = (uintptr_t)data;
704 send_info.write.length = size;
705 send_info.write.lkey = 0;
706 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
707 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
708
709 return dr_postsend_icm_data(dmn, &send_info);
710 }
711
mlx5dr_send_postsend_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * formatted_ste,u8 * mask)712 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
713 struct mlx5dr_ste_htbl *htbl,
714 u8 *formatted_ste, u8 *mask)
715 {
716 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
717 int num_stes_per_iter;
718 int iterations;
719 u8 *data;
720 int ret;
721 int i;
722 int j;
723
724 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
725 &iterations, &num_stes_per_iter);
726 if (ret)
727 return ret;
728
729 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
730
731 /* Send the data iteration times */
732 for (i = 0; i < iterations; i++) {
733 u32 ste_index = i * (byte_size / DR_STE_SIZE);
734 struct postsend_info send_info = {};
735
736 /* Copy all ste's on the data buffer
737 * need to add the bit_mask
738 */
739 for (j = 0; j < num_stes_per_iter; j++) {
740 struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
741 u32 ste_off = j * DR_STE_SIZE;
742
743 if (mlx5dr_ste_is_not_used(ste)) {
744 memcpy(data + ste_off,
745 formatted_ste, DR_STE_SIZE);
746 } else {
747 /* Copy data */
748 memcpy(data + ste_off,
749 htbl->chunk->hw_ste_arr +
750 DR_STE_SIZE_REDUCED * (ste_index + j),
751 DR_STE_SIZE_REDUCED);
752 /* Copy bit_mask */
753 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
754 mask, DR_STE_SIZE_MASK);
755 /* Only when we have mask we need to re-arrange the STE */
756 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
757 data + (j * DR_STE_SIZE),
758 DR_STE_SIZE);
759 }
760 }
761
762 send_info.write.addr = (uintptr_t)data;
763 send_info.write.length = byte_size;
764 send_info.write.lkey = 0;
765 send_info.remote_addr =
766 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
767 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
768
769 ret = dr_postsend_icm_data(dmn, &send_info);
770 if (ret)
771 goto out_free;
772 }
773
774 out_free:
775 kvfree(data);
776 return ret;
777 }
778
779 /* Initialize htble with default STEs */
mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * ste_init_data,bool update_hw_ste)780 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
781 struct mlx5dr_ste_htbl *htbl,
782 u8 *ste_init_data,
783 bool update_hw_ste)
784 {
785 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
786 int iterations;
787 int num_stes;
788 u8 *copy_dst;
789 u8 *data;
790 int ret;
791 int i;
792
793 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
794 &iterations, &num_stes);
795 if (ret)
796 return ret;
797
798 if (update_hw_ste) {
799 /* Copy the reduced STE to hash table ste_arr */
800 for (i = 0; i < num_stes; i++) {
801 copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
802 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
803 }
804 }
805
806 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
807
808 /* Copy the same STE on the data buffer */
809 for (i = 0; i < num_stes; i++) {
810 copy_dst = data + i * DR_STE_SIZE;
811 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
812 }
813
814 /* Send the data iteration times */
815 for (i = 0; i < iterations; i++) {
816 u8 ste_index = i * (byte_size / DR_STE_SIZE);
817 struct postsend_info send_info = {};
818
819 send_info.write.addr = (uintptr_t)data;
820 send_info.write.length = byte_size;
821 send_info.write.lkey = 0;
822 send_info.remote_addr =
823 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
824 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
825
826 ret = dr_postsend_icm_data(dmn, &send_info);
827 if (ret)
828 goto out_free;
829 }
830
831 out_free:
832 kvfree(data);
833 return ret;
834 }
835
mlx5dr_send_postsend_action(struct mlx5dr_domain * dmn,struct mlx5dr_action * action)836 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
837 struct mlx5dr_action *action)
838 {
839 struct postsend_info send_info = {};
840
841 send_info.write.addr = (uintptr_t)action->rewrite->data;
842 send_info.write.length = action->rewrite->num_of_actions *
843 DR_MODIFY_ACTION_SIZE;
844 send_info.write.lkey = 0;
845 send_info.remote_addr =
846 mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
847 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
848
849 return dr_postsend_icm_data(dmn, &send_info);
850 }
851
mlx5dr_send_postsend_pattern(struct mlx5dr_domain * dmn,struct mlx5dr_icm_chunk * chunk,u16 num_of_actions,u8 * data)852 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
853 struct mlx5dr_icm_chunk *chunk,
854 u16 num_of_actions,
855 u8 *data)
856 {
857 struct postsend_info send_info = {};
858 int ret;
859
860 send_info.write.addr = (uintptr_t)data;
861 send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
862 send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
863 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
864
865 ret = dr_postsend_icm_data(dmn, &send_info);
866 if (ret)
867 return ret;
868
869 return 0;
870 }
871
mlx5dr_send_postsend_args(struct mlx5dr_domain * dmn,u64 arg_id,u16 num_of_actions,u8 * actions_data)872 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
873 u16 num_of_actions, u8 *actions_data)
874 {
875 int data_len, iter = 0, cur_sent;
876 u64 addr;
877 int ret;
878
879 addr = (uintptr_t)actions_data;
880 data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
881
882 do {
883 struct postsend_info send_info = {};
884
885 send_info.type = GTA_ARG;
886 send_info.write.addr = addr;
887 cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
888 send_info.write.length = cur_sent;
889 send_info.write.lkey = 0;
890 send_info.remote_addr = arg_id + iter;
891
892 ret = dr_postsend_icm_data(dmn, &send_info);
893 if (ret)
894 goto out;
895
896 iter++;
897 addr += cur_sent;
898 data_len -= cur_sent;
899 } while (data_len > 0);
900
901 out:
902 return ret;
903 }
904
dr_modify_qp_rst2init(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,int port)905 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
906 struct mlx5dr_qp *dr_qp,
907 int port)
908 {
909 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
910 void *qpc;
911
912 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
913
914 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
915 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
916 MLX5_SET(qpc, qpc, rre, 1);
917 MLX5_SET(qpc, qpc, rwe, 1);
918
919 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
920 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
921
922 return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
923 }
924
dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rts_attr * attr)925 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
926 struct mlx5dr_qp *dr_qp,
927 struct dr_qp_rts_attr *attr)
928 {
929 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
930 void *qpc;
931
932 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
933
934 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
935
936 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
937 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
938 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
939
940 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
941 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
942
943 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
944 }
945
dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rtr_attr * attr)946 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
947 struct mlx5dr_qp *dr_qp,
948 struct dr_qp_rtr_attr *attr)
949 {
950 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
951 void *qpc;
952
953 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
954
955 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
956
957 MLX5_SET(qpc, qpc, mtu, attr->mtu);
958 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
959 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
960 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
961 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
962 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
963 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
964 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
965 attr->sgid_index);
966
967 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
968 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
969 attr->udp_src_port);
970
971 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
972 MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
973 MLX5_SET(qpc, qpc, min_rnr_nak, 1);
974
975 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
976 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
977
978 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
979 }
980
dr_send_allow_fl(struct mlx5dr_cmd_caps * caps)981 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
982 {
983 /* Check whether RC RoCE QP creation with force loopback is allowed.
984 * There are two separate capability bits for this:
985 * - force loopback when RoCE is enabled
986 * - force loopback when RoCE is disabled
987 */
988 return ((caps->roce_caps.roce_en &&
989 caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
990 (!caps->roce_caps.roce_en &&
991 caps->roce_caps.fl_rc_qp_when_roce_disabled));
992 }
993
dr_prepare_qp_to_rts(struct mlx5dr_domain * dmn)994 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
995 {
996 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
997 struct dr_qp_rts_attr rts_attr = {};
998 struct dr_qp_rtr_attr rtr_attr = {};
999 enum ib_mtu mtu = IB_MTU_1024;
1000 u16 gid_index = 0;
1001 int port = 1;
1002 int ret;
1003
1004 /* Init */
1005 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
1006 if (ret) {
1007 mlx5dr_err(dmn, "Failed modify QP rst2init\n");
1008 return ret;
1009 }
1010
1011 /* RTR */
1012 rtr_attr.mtu = mtu;
1013 rtr_attr.qp_num = dr_qp->qpn;
1014 rtr_attr.min_rnr_timer = 12;
1015 rtr_attr.port_num = port;
1016 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
1017
1018 /* If QP creation with force loopback is allowed, then there
1019 * is no need for GID index when creating the QP.
1020 * Otherwise we query GID attributes and use GID index.
1021 */
1022 rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
1023 if (!rtr_attr.fl) {
1024 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
1025 &rtr_attr.dgid_attr);
1026 if (ret)
1027 return ret;
1028
1029 rtr_attr.sgid_index = gid_index;
1030 }
1031
1032 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
1033 if (ret) {
1034 mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
1035 return ret;
1036 }
1037
1038 /* RTS */
1039 rts_attr.timeout = 14;
1040 rts_attr.retry_cnt = 7;
1041 rts_attr.rnr_retry = 7;
1042
1043 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
1044 if (ret) {
1045 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
1046 return ret;
1047 }
1048
1049 return 0;
1050 }
1051
dr_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)1052 static void dr_cq_complete(struct mlx5_core_cq *mcq,
1053 struct mlx5_eqe *eqe)
1054 {
1055 pr_err("CQ completion CQ: #%u\n", mcq->cqn);
1056 }
1057
dr_create_cq(struct mlx5_core_dev * mdev,struct mlx5_uars_page * uar,size_t ncqe)1058 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
1059 struct mlx5_uars_page *uar,
1060 size_t ncqe)
1061 {
1062 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
1063 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1064 struct mlx5_wq_param wqp;
1065 struct mlx5_cqe64 *cqe;
1066 struct mlx5dr_cq *cq;
1067 int inlen, err, eqn;
1068 void *cqc, *in;
1069 __be64 *pas;
1070 u32 i;
1071
1072 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1073 if (!cq)
1074 return NULL;
1075
1076 ncqe = roundup_pow_of_two(ncqe);
1077 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
1078
1079 wqp.buf_numa_node = mdev->priv.numa_node;
1080 wqp.db_numa_node = mdev->priv.numa_node;
1081
1082 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
1083 &cq->wq_ctrl);
1084 if (err)
1085 goto out;
1086
1087 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1088 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1089 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
1090 }
1091
1092 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1093 sizeof(u64) * cq->wq_ctrl.buf.npages;
1094 in = kvzalloc(inlen, GFP_KERNEL);
1095 if (!in)
1096 goto err_cqwq;
1097
1098 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
1099 if (err) {
1100 kvfree(in);
1101 goto err_cqwq;
1102 }
1103
1104 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1105 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
1106 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1107 MLX5_SET(cqc, cqc, uar_page, uar->index);
1108 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1109 MLX5_ADAPTER_PAGE_SHIFT);
1110 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1111
1112 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
1113 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
1114
1115 cq->mcq.comp = dr_cq_complete;
1116
1117 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
1118 kvfree(in);
1119
1120 if (err)
1121 goto err_cqwq;
1122
1123 cq->mcq.cqe_sz = 64;
1124 cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1125 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1126 *cq->mcq.set_ci_db = 0;
1127
1128 /* set no-zero value, in order to avoid the HW to run db-recovery on
1129 * CQ that used in polling mode.
1130 */
1131 *cq->mcq.arm_db = cpu_to_be32(2 << 28);
1132
1133 cq->mcq.vector = 0;
1134 cq->mcq.uar = uar;
1135 cq->mdev = mdev;
1136
1137 return cq;
1138
1139 err_cqwq:
1140 mlx5_wq_destroy(&cq->wq_ctrl);
1141 out:
1142 kfree(cq);
1143 return NULL;
1144 }
1145
dr_destroy_cq(struct mlx5_core_dev * mdev,struct mlx5dr_cq * cq)1146 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
1147 {
1148 mlx5_core_destroy_cq(mdev, &cq->mcq);
1149 mlx5_wq_destroy(&cq->wq_ctrl);
1150 kfree(cq);
1151 }
1152
dr_create_mkey(struct mlx5_core_dev * mdev,u32 pdn,u32 * mkey)1153 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
1154 {
1155 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
1156 void *mkc;
1157
1158 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1159 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
1160 MLX5_SET(mkc, mkc, a, 1);
1161 MLX5_SET(mkc, mkc, rw, 1);
1162 MLX5_SET(mkc, mkc, rr, 1);
1163 MLX5_SET(mkc, mkc, lw, 1);
1164 MLX5_SET(mkc, mkc, lr, 1);
1165
1166 MLX5_SET(mkc, mkc, pd, pdn);
1167 MLX5_SET(mkc, mkc, length64, 1);
1168 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1169
1170 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
1171 }
1172
dr_reg_mr(struct mlx5_core_dev * mdev,u32 pdn,void * buf,size_t size)1173 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
1174 u32 pdn, void *buf, size_t size)
1175 {
1176 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1177 struct device *dma_device;
1178 dma_addr_t dma_addr;
1179 int err;
1180
1181 if (!mr)
1182 return NULL;
1183
1184 dma_device = mlx5_core_dma_dev(mdev);
1185 dma_addr = dma_map_single(dma_device, buf, size,
1186 DMA_BIDIRECTIONAL);
1187 err = dma_mapping_error(dma_device, dma_addr);
1188 if (err) {
1189 mlx5_core_warn(mdev, "Can't dma buf\n");
1190 kfree(mr);
1191 return NULL;
1192 }
1193
1194 err = dr_create_mkey(mdev, pdn, &mr->mkey);
1195 if (err) {
1196 mlx5_core_warn(mdev, "Can't create mkey\n");
1197 dma_unmap_single(dma_device, dma_addr, size,
1198 DMA_BIDIRECTIONAL);
1199 kfree(mr);
1200 return NULL;
1201 }
1202
1203 mr->dma_addr = dma_addr;
1204 mr->size = size;
1205 mr->addr = buf;
1206
1207 return mr;
1208 }
1209
dr_dereg_mr(struct mlx5_core_dev * mdev,struct mlx5dr_mr * mr)1210 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
1211 {
1212 mlx5_core_destroy_mkey(mdev, mr->mkey);
1213 dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
1214 DMA_BIDIRECTIONAL);
1215 kfree(mr);
1216 }
1217
mlx5dr_send_ring_alloc(struct mlx5dr_domain * dmn)1218 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
1219 {
1220 struct dr_qp_init_attr init_attr = {};
1221 int cq_size;
1222 int size;
1223 int ret;
1224
1225 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
1226 if (!dmn->send_ring)
1227 return -ENOMEM;
1228
1229 cq_size = QUEUE_SIZE + 1;
1230 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
1231 if (!dmn->send_ring->cq) {
1232 mlx5dr_err(dmn, "Failed creating CQ\n");
1233 ret = -ENOMEM;
1234 goto free_send_ring;
1235 }
1236
1237 init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
1238 init_attr.pdn = dmn->pdn;
1239 init_attr.uar = dmn->uar;
1240 init_attr.max_send_wr = QUEUE_SIZE;
1241
1242 /* Isolated VL is applicable only if force loopback is supported */
1243 if (dr_send_allow_fl(&dmn->info.caps))
1244 init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
1245
1246 spin_lock_init(&dmn->send_ring->lock);
1247
1248 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
1249 if (!dmn->send_ring->qp) {
1250 mlx5dr_err(dmn, "Failed creating QP\n");
1251 ret = -ENOMEM;
1252 goto clean_cq;
1253 }
1254
1255 dmn->send_ring->cq->qp = dmn->send_ring->qp;
1256
1257 dmn->info.max_send_wr = QUEUE_SIZE;
1258 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
1259 DR_STE_SIZE);
1260
1261 dmn->send_ring->signal_th = dmn->info.max_send_wr /
1262 SIGNAL_PER_DIV_QUEUE;
1263
1264 /* Prepare qp to be used */
1265 ret = dr_prepare_qp_to_rts(dmn);
1266 if (ret)
1267 goto clean_qp;
1268
1269 dmn->send_ring->max_post_send_size =
1270 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
1271 DR_ICM_TYPE_STE);
1272
1273 /* Allocating the max size as a buffer for writing */
1274 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
1275 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
1276 if (!dmn->send_ring->buf) {
1277 ret = -ENOMEM;
1278 goto clean_qp;
1279 }
1280
1281 dmn->send_ring->buf_size = size;
1282
1283 dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
1284 dmn->pdn, dmn->send_ring->buf, size);
1285 if (!dmn->send_ring->mr) {
1286 ret = -ENOMEM;
1287 goto free_mem;
1288 }
1289
1290 dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
1291 GFP_KERNEL);
1292 if (!dmn->send_ring->sync_buff) {
1293 ret = -ENOMEM;
1294 goto clean_mr;
1295 }
1296
1297 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
1298 dmn->pdn, dmn->send_ring->sync_buff,
1299 dmn->send_ring->max_post_send_size);
1300 if (!dmn->send_ring->sync_mr) {
1301 ret = -ENOMEM;
1302 goto free_sync_mem;
1303 }
1304
1305 return 0;
1306
1307 free_sync_mem:
1308 kfree(dmn->send_ring->sync_buff);
1309 clean_mr:
1310 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
1311 free_mem:
1312 kfree(dmn->send_ring->buf);
1313 clean_qp:
1314 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
1315 clean_cq:
1316 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
1317 free_send_ring:
1318 kfree(dmn->send_ring);
1319
1320 return ret;
1321 }
1322
mlx5dr_send_ring_free(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)1323 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
1324 struct mlx5dr_send_ring *send_ring)
1325 {
1326 dr_destroy_qp(dmn->mdev, send_ring->qp);
1327 dr_destroy_cq(dmn->mdev, send_ring->cq);
1328 dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
1329 dr_dereg_mr(dmn->mdev, send_ring->mr);
1330 kfree(send_ring->buf);
1331 kfree(send_ring->sync_buff);
1332 kfree(send_ring);
1333 }
1334
mlx5dr_send_ring_force_drain(struct mlx5dr_domain * dmn)1335 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
1336 {
1337 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
1338 struct postsend_info send_info = {};
1339 u8 data[DR_STE_SIZE];
1340 int num_of_sends_req;
1341 int ret;
1342 int i;
1343
1344 /* Sending this amount of requests makes sure we will get drain */
1345 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
1346
1347 /* Send fake requests forcing the last to be signaled */
1348 send_info.write.addr = (uintptr_t)data;
1349 send_info.write.length = DR_STE_SIZE;
1350 send_info.write.lkey = 0;
1351 /* Using the sync_mr in order to write/read */
1352 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
1353 send_info.rkey = send_ring->sync_mr->mkey;
1354
1355 for (i = 0; i < num_of_sends_req; i++) {
1356 ret = dr_postsend_icm_data(dmn, &send_info);
1357 if (ret)
1358 return ret;
1359 }
1360
1361 spin_lock(&send_ring->lock);
1362 ret = dr_handle_pending_wc(dmn, send_ring);
1363 spin_unlock(&send_ring->lock);
1364
1365 return ret;
1366 }
1367