1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2022, Alibaba Group. */ 6 7 #include "erdma.h" 8 9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq) 10 { 11 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 12 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) | 13 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | 14 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | 15 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); 16 17 *cmdq->cq.dbrec = db_data; 18 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); 19 20 atomic64_inc(&cmdq->cq.armed_num); 21 } 22 23 static void kick_cmdq_db(struct erdma_cmdq *cmdq) 24 { 25 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 26 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); 27 28 *cmdq->sq.dbrec = db_data; 29 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); 30 } 31 32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) 33 { 34 int comp_idx; 35 36 spin_lock(&cmdq->lock); 37 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap, 38 cmdq->max_outstandings); 39 if (comp_idx == cmdq->max_outstandings) { 40 spin_unlock(&cmdq->lock); 41 return ERR_PTR(-ENOMEM); 42 } 43 44 __set_bit(comp_idx, cmdq->comp_wait_bitmap); 45 spin_unlock(&cmdq->lock); 46 47 return &cmdq->wait_pool[comp_idx]; 48 } 49 50 static void put_comp_wait(struct erdma_cmdq *cmdq, 51 struct erdma_comp_wait *comp_wait) 52 { 53 int used; 54 55 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; 56 spin_lock(&cmdq->lock); 57 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); 58 spin_unlock(&cmdq->lock); 59 60 WARN_ON(!used); 61 } 62 63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev, 64 struct erdma_cmdq *cmdq) 65 { 66 int i; 67 68 cmdq->wait_pool = 69 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, 70 sizeof(struct erdma_comp_wait), GFP_KERNEL); 71 if (!cmdq->wait_pool) 72 return -ENOMEM; 73 74 spin_lock_init(&cmdq->lock); 75 cmdq->comp_wait_bitmap = devm_bitmap_zalloc( 76 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL); 77 if (!cmdq->comp_wait_bitmap) 78 return -ENOMEM; 79 80 for (i = 0; i < cmdq->max_outstandings; i++) { 81 init_completion(&cmdq->wait_pool[i].wait_event); 82 cmdq->wait_pool[i].ctx_id = i; 83 } 84 85 return 0; 86 } 87 88 static int erdma_cmdq_sq_init(struct erdma_dev *dev) 89 { 90 struct erdma_cmdq *cmdq = &dev->cmdq; 91 struct erdma_cmdq_sq *sq = &cmdq->sq; 92 93 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE); 94 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt; 95 96 sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT, 97 &sq->qbuf_dma_addr, GFP_KERNEL); 98 if (!sq->qbuf) 99 return -ENOMEM; 100 101 sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma); 102 if (!sq->dbrec) 103 goto err_out; 104 105 spin_lock_init(&sq->lock); 106 107 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG, 108 upper_32_bits(sq->qbuf_dma_addr)); 109 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG, 110 lower_32_bits(sq->qbuf_dma_addr)); 111 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth); 112 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma); 113 114 return 0; 115 116 err_out: 117 dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT, 118 sq->qbuf, sq->qbuf_dma_addr); 119 120 return -ENOMEM; 121 } 122 123 static int erdma_cmdq_cq_init(struct erdma_dev *dev) 124 { 125 struct erdma_cmdq *cmdq = &dev->cmdq; 126 struct erdma_cmdq_cq *cq = &cmdq->cq; 127 128 cq->depth = cmdq->sq.depth; 129 cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, 130 &cq->qbuf_dma_addr, GFP_KERNEL); 131 if (!cq->qbuf) 132 return -ENOMEM; 133 134 spin_lock_init(&cq->lock); 135 136 cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma); 137 if (!cq->dbrec) 138 goto err_out; 139 140 atomic64_set(&cq->armed_num, 0); 141 142 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG, 143 upper_32_bits(cq->qbuf_dma_addr)); 144 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, 145 lower_32_bits(cq->qbuf_dma_addr)); 146 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma); 147 148 return 0; 149 150 err_out: 151 dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf, 152 cq->qbuf_dma_addr); 153 154 return -ENOMEM; 155 } 156 157 static int erdma_cmdq_eq_init(struct erdma_dev *dev) 158 { 159 struct erdma_cmdq *cmdq = &dev->cmdq; 160 struct erdma_eq *eq = &cmdq->eq; 161 162 eq->depth = cmdq->max_outstandings; 163 eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, 164 &eq->qbuf_dma_addr, GFP_KERNEL); 165 if (!eq->qbuf) 166 return -ENOMEM; 167 168 spin_lock_init(&eq->lock); 169 atomic64_set(&eq->event_num, 0); 170 171 eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG; 172 eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma); 173 if (!eq->dbrec) 174 goto err_out; 175 176 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, 177 upper_32_bits(eq->qbuf_dma_addr)); 178 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG, 179 lower_32_bits(eq->qbuf_dma_addr)); 180 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth); 181 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma); 182 183 return 0; 184 185 err_out: 186 dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf, 187 eq->qbuf_dma_addr); 188 189 return -ENOMEM; 190 } 191 192 int erdma_cmdq_init(struct erdma_dev *dev) 193 { 194 struct erdma_cmdq *cmdq = &dev->cmdq; 195 int err; 196 197 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; 198 cmdq->use_event = false; 199 200 sema_init(&cmdq->credits, cmdq->max_outstandings); 201 202 err = erdma_cmdq_wait_res_init(dev, cmdq); 203 if (err) 204 return err; 205 206 err = erdma_cmdq_sq_init(dev); 207 if (err) 208 return err; 209 210 err = erdma_cmdq_cq_init(dev); 211 if (err) 212 goto err_destroy_sq; 213 214 err = erdma_cmdq_eq_init(dev); 215 if (err) 216 goto err_destroy_cq; 217 218 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 219 220 return 0; 221 222 err_destroy_cq: 223 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT, 224 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 225 226 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma); 227 228 err_destroy_sq: 229 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT, 230 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 231 232 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma); 233 234 return err; 235 } 236 237 void erdma_finish_cmdq_init(struct erdma_dev *dev) 238 { 239 /* after device init successfully, change cmdq to event mode. */ 240 dev->cmdq.use_event = true; 241 arm_cmdq_cq(&dev->cmdq); 242 } 243 244 void erdma_cmdq_destroy(struct erdma_dev *dev) 245 { 246 struct erdma_cmdq *cmdq = &dev->cmdq; 247 248 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 249 250 dma_free_coherent(&dev->pdev->dev, cmdq->eq.depth << EQE_SHIFT, 251 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); 252 253 dma_pool_free(dev->db_pool, cmdq->eq.dbrec, cmdq->eq.dbrec_dma); 254 255 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT, 256 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 257 258 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma); 259 260 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT, 261 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 262 263 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma); 264 } 265 266 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) 267 { 268 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci, 269 cmdq->cq.depth, CQE_SHIFT); 270 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, 271 be32_to_cpu(READ_ONCE(*cqe))); 272 273 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL; 274 } 275 276 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len, 277 struct erdma_comp_wait *comp_wait) 278 { 279 __le64 *wqe; 280 u64 hdr = *req; 281 282 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED; 283 reinit_completion(&comp_wait->wait_event); 284 comp_wait->sq_pi = cmdq->sq.pi; 285 286 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth, 287 SQEBB_SHIFT); 288 memcpy(wqe, req, req_len); 289 290 cmdq->sq.pi += cmdq->sq.wqebb_cnt; 291 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) | 292 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, 293 comp_wait->ctx_id) | 294 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1); 295 *wqe = cpu_to_le64(hdr); 296 297 kick_cmdq_db(cmdq); 298 } 299 300 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) 301 { 302 struct erdma_comp_wait *comp_wait; 303 u32 hdr0, sqe_idx; 304 __be32 *cqe; 305 u16 ctx_id; 306 u64 *sqe; 307 308 cqe = get_next_valid_cmdq_cqe(cmdq); 309 if (!cqe) 310 return -EAGAIN; 311 312 cmdq->cq.ci++; 313 314 dma_rmb(); 315 hdr0 = be32_to_cpu(*cqe); 316 sqe_idx = be32_to_cpu(*(cqe + 1)); 317 318 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth, 319 SQEBB_SHIFT); 320 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe); 321 comp_wait = &cmdq->wait_pool[ctx_id]; 322 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED) 323 return -EIO; 324 325 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; 326 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); 327 cmdq->sq.ci += cmdq->sq.wqebb_cnt; 328 /* Copy 16B comp data after cqe hdr to outer */ 329 be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4); 330 331 if (cmdq->use_event) 332 complete(&comp_wait->wait_event); 333 334 return 0; 335 } 336 337 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) 338 { 339 unsigned long flags; 340 u16 comp_num; 341 342 spin_lock_irqsave(&cmdq->cq.lock, flags); 343 344 /* We must have less than # of max_outstandings 345 * completions at one time. 346 */ 347 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++) 348 if (erdma_poll_single_cmd_completion(cmdq)) 349 break; 350 351 if (comp_num && cmdq->use_event) 352 arm_cmdq_cq(cmdq); 353 354 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 355 } 356 357 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) 358 { 359 int got_event = 0; 360 361 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || 362 !cmdq->use_event) 363 return; 364 365 while (get_next_valid_eqe(&cmdq->eq)) { 366 cmdq->eq.ci++; 367 got_event++; 368 } 369 370 if (got_event) { 371 cmdq->cq.cmdsn++; 372 erdma_polling_cmd_completions(cmdq); 373 } 374 375 notify_eq(&cmdq->eq); 376 } 377 378 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, 379 struct erdma_cmdq *cmdq, u32 timeout) 380 { 381 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout); 382 383 while (1) { 384 erdma_polling_cmd_completions(cmdq); 385 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED) 386 break; 387 388 if (time_is_before_jiffies(comp_timeout)) 389 return -ETIME; 390 391 msleep(20); 392 } 393 394 return 0; 395 } 396 397 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx, 398 struct erdma_cmdq *cmdq, u32 timeout) 399 { 400 unsigned long flags = 0; 401 402 wait_for_completion_timeout(&comp_ctx->wait_event, 403 msecs_to_jiffies(timeout)); 404 405 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) { 406 spin_lock_irqsave(&cmdq->cq.lock, flags); 407 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT; 408 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 409 return -ETIME; 410 } 411 412 return 0; 413 } 414 415 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) 416 { 417 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) | 418 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); 419 } 420 421 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, 422 u64 *resp0, u64 *resp1) 423 { 424 struct erdma_comp_wait *comp_wait; 425 int ret; 426 427 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) 428 return -ENODEV; 429 430 down(&cmdq->credits); 431 432 comp_wait = get_comp_wait(cmdq); 433 if (IS_ERR(comp_wait)) { 434 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 435 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state); 436 up(&cmdq->credits); 437 return PTR_ERR(comp_wait); 438 } 439 440 spin_lock(&cmdq->sq.lock); 441 push_cmdq_sqe(cmdq, req, req_size, comp_wait); 442 spin_unlock(&cmdq->sq.lock); 443 444 if (cmdq->use_event) 445 ret = erdma_wait_cmd_completion(comp_wait, cmdq, 446 ERDMA_CMDQ_TIMEOUT_MS); 447 else 448 ret = erdma_poll_cmd_completion(comp_wait, cmdq, 449 ERDMA_CMDQ_TIMEOUT_MS); 450 451 if (ret) { 452 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state); 453 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 454 goto out; 455 } 456 457 if (comp_wait->comp_status) 458 ret = -EIO; 459 460 if (resp0 && resp1) { 461 *resp0 = *((u64 *)&comp_wait->comp_data[0]); 462 *resp1 = *((u64 *)&comp_wait->comp_data[2]); 463 } 464 put_comp_wait(cmdq, comp_wait); 465 466 out: 467 up(&cmdq->credits); 468 469 return ret; 470 } 471