1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2022, Alibaba Group. */ 6 7 #include "erdma.h" 8 9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq) 10 { 11 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 12 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) | 13 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | 14 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | 15 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); 16 17 *cmdq->cq.dbrec = db_data; 18 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); 19 20 atomic64_inc(&cmdq->cq.armed_num); 21 } 22 23 static void kick_cmdq_db(struct erdma_cmdq *cmdq) 24 { 25 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 26 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); 27 28 *cmdq->sq.dbrec = db_data; 29 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); 30 } 31 32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) 33 { 34 int comp_idx; 35 36 spin_lock(&cmdq->lock); 37 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap, 38 cmdq->max_outstandings); 39 if (comp_idx == cmdq->max_outstandings) { 40 spin_unlock(&cmdq->lock); 41 return ERR_PTR(-ENOMEM); 42 } 43 44 __set_bit(comp_idx, cmdq->comp_wait_bitmap); 45 spin_unlock(&cmdq->lock); 46 47 return &cmdq->wait_pool[comp_idx]; 48 } 49 50 static void put_comp_wait(struct erdma_cmdq *cmdq, 51 struct erdma_comp_wait *comp_wait) 52 { 53 int used; 54 55 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; 56 spin_lock(&cmdq->lock); 57 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); 58 spin_unlock(&cmdq->lock); 59 60 WARN_ON(!used); 61 } 62 63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev, 64 struct erdma_cmdq *cmdq) 65 { 66 int i; 67 68 cmdq->wait_pool = 69 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, 70 sizeof(struct erdma_comp_wait), GFP_KERNEL); 71 if (!cmdq->wait_pool) 72 return -ENOMEM; 73 74 spin_lock_init(&cmdq->lock); 75 cmdq->comp_wait_bitmap = devm_bitmap_zalloc( 76 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL); 77 if (!cmdq->comp_wait_bitmap) 78 return -ENOMEM; 79 80 for (i = 0; i < cmdq->max_outstandings; i++) { 81 init_completion(&cmdq->wait_pool[i].wait_event); 82 cmdq->wait_pool[i].ctx_id = i; 83 } 84 85 return 0; 86 } 87 88 static int erdma_cmdq_sq_init(struct erdma_dev *dev) 89 { 90 struct erdma_cmdq *cmdq = &dev->cmdq; 91 struct erdma_cmdq_sq *sq = &cmdq->sq; 92 93 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE); 94 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt; 95 96 sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT, 97 &sq->qbuf_dma_addr, GFP_KERNEL); 98 if (!sq->qbuf) 99 return -ENOMEM; 100 101 sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma); 102 if (!sq->dbrec) 103 goto err_out; 104 105 spin_lock_init(&sq->lock); 106 107 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG, 108 upper_32_bits(sq->qbuf_dma_addr)); 109 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG, 110 lower_32_bits(sq->qbuf_dma_addr)); 111 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth); 112 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma); 113 114 return 0; 115 116 err_out: 117 dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT, 118 sq->qbuf, sq->qbuf_dma_addr); 119 120 return -ENOMEM; 121 } 122 123 static int erdma_cmdq_cq_init(struct erdma_dev *dev) 124 { 125 struct erdma_cmdq *cmdq = &dev->cmdq; 126 struct erdma_cmdq_cq *cq = &cmdq->cq; 127 128 cq->depth = cmdq->sq.depth; 129 cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, 130 &cq->qbuf_dma_addr, GFP_KERNEL); 131 if (!cq->qbuf) 132 return -ENOMEM; 133 134 spin_lock_init(&cq->lock); 135 136 cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma); 137 if (!cq->dbrec) 138 goto err_out; 139 140 atomic64_set(&cq->armed_num, 0); 141 142 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG, 143 upper_32_bits(cq->qbuf_dma_addr)); 144 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, 145 lower_32_bits(cq->qbuf_dma_addr)); 146 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma); 147 148 return 0; 149 150 err_out: 151 dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf, 152 cq->qbuf_dma_addr); 153 154 return -ENOMEM; 155 } 156 157 static int erdma_cmdq_eq_init(struct erdma_dev *dev) 158 { 159 struct erdma_cmdq *cmdq = &dev->cmdq; 160 struct erdma_eq *eq = &cmdq->eq; 161 int ret; 162 163 ret = erdma_eq_common_init(dev, eq, cmdq->max_outstandings); 164 if (ret) 165 return ret; 166 167 eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG; 168 169 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, 170 upper_32_bits(eq->qbuf_dma_addr)); 171 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG, 172 lower_32_bits(eq->qbuf_dma_addr)); 173 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth); 174 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma); 175 176 return 0; 177 } 178 179 int erdma_cmdq_init(struct erdma_dev *dev) 180 { 181 struct erdma_cmdq *cmdq = &dev->cmdq; 182 int err; 183 184 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; 185 cmdq->use_event = false; 186 187 sema_init(&cmdq->credits, cmdq->max_outstandings); 188 189 err = erdma_cmdq_wait_res_init(dev, cmdq); 190 if (err) 191 return err; 192 193 err = erdma_cmdq_sq_init(dev); 194 if (err) 195 return err; 196 197 err = erdma_cmdq_cq_init(dev); 198 if (err) 199 goto err_destroy_sq; 200 201 err = erdma_cmdq_eq_init(dev); 202 if (err) 203 goto err_destroy_cq; 204 205 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 206 207 return 0; 208 209 err_destroy_cq: 210 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT, 211 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 212 213 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma); 214 215 err_destroy_sq: 216 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT, 217 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 218 219 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma); 220 221 return err; 222 } 223 224 void erdma_finish_cmdq_init(struct erdma_dev *dev) 225 { 226 /* after device init successfully, change cmdq to event mode. */ 227 dev->cmdq.use_event = true; 228 arm_cmdq_cq(&dev->cmdq); 229 } 230 231 void erdma_cmdq_destroy(struct erdma_dev *dev) 232 { 233 struct erdma_cmdq *cmdq = &dev->cmdq; 234 235 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 236 237 erdma_eq_destroy(dev, &cmdq->eq); 238 239 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT, 240 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 241 242 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma); 243 244 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT, 245 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 246 247 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma); 248 } 249 250 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) 251 { 252 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci, 253 cmdq->cq.depth, CQE_SHIFT); 254 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, 255 be32_to_cpu(READ_ONCE(*cqe))); 256 257 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL; 258 } 259 260 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len, 261 struct erdma_comp_wait *comp_wait) 262 { 263 __le64 *wqe; 264 u64 hdr = *req; 265 266 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED; 267 reinit_completion(&comp_wait->wait_event); 268 comp_wait->sq_pi = cmdq->sq.pi; 269 270 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth, 271 SQEBB_SHIFT); 272 memcpy(wqe, req, req_len); 273 274 cmdq->sq.pi += cmdq->sq.wqebb_cnt; 275 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) | 276 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, 277 comp_wait->ctx_id) | 278 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1); 279 *wqe = cpu_to_le64(hdr); 280 281 kick_cmdq_db(cmdq); 282 } 283 284 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) 285 { 286 struct erdma_comp_wait *comp_wait; 287 u32 hdr0, sqe_idx; 288 __be32 *cqe; 289 u16 ctx_id; 290 u64 *sqe; 291 292 cqe = get_next_valid_cmdq_cqe(cmdq); 293 if (!cqe) 294 return -EAGAIN; 295 296 cmdq->cq.ci++; 297 298 dma_rmb(); 299 hdr0 = be32_to_cpu(*cqe); 300 sqe_idx = be32_to_cpu(*(cqe + 1)); 301 302 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth, 303 SQEBB_SHIFT); 304 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe); 305 comp_wait = &cmdq->wait_pool[ctx_id]; 306 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED) 307 return -EIO; 308 309 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; 310 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); 311 cmdq->sq.ci += cmdq->sq.wqebb_cnt; 312 /* Copy 16B comp data after cqe hdr to outer */ 313 be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4); 314 315 if (cmdq->use_event) 316 complete(&comp_wait->wait_event); 317 318 return 0; 319 } 320 321 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) 322 { 323 unsigned long flags; 324 u16 comp_num; 325 326 spin_lock_irqsave(&cmdq->cq.lock, flags); 327 328 /* We must have less than # of max_outstandings 329 * completions at one time. 330 */ 331 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++) 332 if (erdma_poll_single_cmd_completion(cmdq)) 333 break; 334 335 if (comp_num && cmdq->use_event) 336 arm_cmdq_cq(cmdq); 337 338 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 339 } 340 341 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) 342 { 343 int got_event = 0; 344 345 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || 346 !cmdq->use_event) 347 return; 348 349 while (get_next_valid_eqe(&cmdq->eq)) { 350 cmdq->eq.ci++; 351 got_event++; 352 } 353 354 if (got_event) { 355 cmdq->cq.cmdsn++; 356 erdma_polling_cmd_completions(cmdq); 357 } 358 359 notify_eq(&cmdq->eq); 360 } 361 362 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, 363 struct erdma_cmdq *cmdq, u32 timeout) 364 { 365 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout); 366 367 while (1) { 368 erdma_polling_cmd_completions(cmdq); 369 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED) 370 break; 371 372 if (time_is_before_jiffies(comp_timeout)) 373 return -ETIME; 374 375 msleep(20); 376 } 377 378 return 0; 379 } 380 381 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx, 382 struct erdma_cmdq *cmdq, u32 timeout) 383 { 384 unsigned long flags = 0; 385 386 wait_for_completion_timeout(&comp_ctx->wait_event, 387 msecs_to_jiffies(timeout)); 388 389 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) { 390 spin_lock_irqsave(&cmdq->cq.lock, flags); 391 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT; 392 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 393 return -ETIME; 394 } 395 396 return 0; 397 } 398 399 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) 400 { 401 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) | 402 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); 403 } 404 405 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, 406 u64 *resp0, u64 *resp1) 407 { 408 struct erdma_comp_wait *comp_wait; 409 int ret; 410 411 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) 412 return -ENODEV; 413 414 down(&cmdq->credits); 415 416 comp_wait = get_comp_wait(cmdq); 417 if (IS_ERR(comp_wait)) { 418 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 419 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state); 420 up(&cmdq->credits); 421 return PTR_ERR(comp_wait); 422 } 423 424 spin_lock(&cmdq->sq.lock); 425 push_cmdq_sqe(cmdq, req, req_size, comp_wait); 426 spin_unlock(&cmdq->sq.lock); 427 428 if (cmdq->use_event) 429 ret = erdma_wait_cmd_completion(comp_wait, cmdq, 430 ERDMA_CMDQ_TIMEOUT_MS); 431 else 432 ret = erdma_poll_cmd_completion(comp_wait, cmdq, 433 ERDMA_CMDQ_TIMEOUT_MS); 434 435 if (ret) { 436 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state); 437 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 438 goto out; 439 } 440 441 if (comp_wait->comp_status) 442 ret = -EIO; 443 444 if (resp0 && resp1) { 445 *resp0 = *((u64 *)&comp_wait->comp_data[0]); 446 *resp1 = *((u64 *)&comp_wait->comp_data[2]); 447 } 448 put_comp_wait(cmdq, comp_wait); 449 450 out: 451 up(&cmdq->credits); 452 453 return ret; 454 } 455