1 // SPDX-License-Identifier: GPL-2.0 2 /* Marvell OcteonTX CPT driver 3 * 4 * Copyright (C) 2019 Marvell International Ltd. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11 #include "otx_cptvf.h" 12 #include "otx_cptvf_algs.h" 13 14 /* Completion code size and initial value */ 15 #define COMPLETION_CODE_SIZE 8 16 #define COMPLETION_CODE_INIT 0 17 18 /* SG list header size in bytes */ 19 #define SG_LIST_HDR_SIZE 8 20 21 /* Default timeout when waiting for free pending entry in us */ 22 #define CPT_PENTRY_TIMEOUT 1000 23 #define CPT_PENTRY_STEP 50 24 25 /* Default threshold for stopping and resuming sender requests */ 26 #define CPT_IQ_STOP_MARGIN 128 27 #define CPT_IQ_RESUME_MARGIN 512 28 29 #define CPT_DMA_ALIGN 128 30 31 void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req) 32 { 33 int i; 34 35 pr_debug("Gather list size %d\n", req->incnt); 36 for (i = 0; i < req->incnt; i++) { 37 pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, 38 req->in[i].size, req->in[i].vptr, 39 (void *) req->in[i].dma_addr); 40 pr_debug("Buffer hexdump (%d bytes)\n", 41 req->in[i].size); 42 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, 43 req->in[i].vptr, req->in[i].size, false); 44 } 45 46 pr_debug("Scatter list size %d\n", req->outcnt); 47 for (i = 0; i < req->outcnt; i++) { 48 pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, 49 req->out[i].size, req->out[i].vptr, 50 (void *) req->out[i].dma_addr); 51 pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); 52 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, 53 req->out[i].vptr, req->out[i].size, false); 54 } 55 } 56 57 static inline struct otx_cpt_pending_entry *get_free_pending_entry( 58 struct otx_cpt_pending_queue *q, 59 int qlen) 60 { 61 struct otx_cpt_pending_entry *ent = NULL; 62 63 ent = &q->head[q->rear]; 64 if (unlikely(ent->busy)) 65 return NULL; 66 67 q->rear++; 68 if (unlikely(q->rear == qlen)) 69 q->rear = 0; 70 71 return ent; 72 } 73 74 static inline u32 modulo_inc(u32 index, u32 length, u32 inc) 75 { 76 if (WARN_ON(inc > length)) 77 inc = length; 78 79 index += inc; 80 if (unlikely(index >= length)) 81 index -= length; 82 83 return index; 84 } 85 86 static inline void free_pentry(struct otx_cpt_pending_entry *pentry) 87 { 88 pentry->completion_addr = NULL; 89 pentry->info = NULL; 90 pentry->callback = NULL; 91 pentry->areq = NULL; 92 pentry->resume_sender = false; 93 pentry->busy = false; 94 } 95 96 static inline int setup_sgio_components(struct pci_dev *pdev, 97 struct otx_cpt_buf_ptr *list, 98 int buf_count, u8 *buffer) 99 { 100 struct otx_cpt_sglist_component *sg_ptr = NULL; 101 int ret = 0, i, j; 102 int components; 103 104 if (unlikely(!list)) { 105 dev_err(&pdev->dev, "Input list pointer is NULL\n"); 106 return -EFAULT; 107 } 108 109 for (i = 0; i < buf_count; i++) { 110 if (likely(list[i].vptr)) { 111 list[i].dma_addr = dma_map_single(&pdev->dev, 112 list[i].vptr, 113 list[i].size, 114 DMA_BIDIRECTIONAL); 115 if (unlikely(dma_mapping_error(&pdev->dev, 116 list[i].dma_addr))) { 117 dev_err(&pdev->dev, "Dma mapping failed\n"); 118 ret = -EIO; 119 goto sg_cleanup; 120 } 121 } 122 } 123 124 components = buf_count / 4; 125 sg_ptr = (struct otx_cpt_sglist_component *)buffer; 126 for (i = 0; i < components; i++) { 127 sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); 128 sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); 129 sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); 130 sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); 131 sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); 132 sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); 133 sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); 134 sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); 135 sg_ptr++; 136 } 137 components = buf_count % 4; 138 139 switch (components) { 140 case 3: 141 sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); 142 sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); 143 /* Fall through */ 144 case 2: 145 sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); 146 sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); 147 /* Fall through */ 148 case 1: 149 sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); 150 sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); 151 break; 152 default: 153 break; 154 } 155 return ret; 156 157 sg_cleanup: 158 for (j = 0; j < i; j++) { 159 if (list[j].dma_addr) { 160 dma_unmap_single(&pdev->dev, list[i].dma_addr, 161 list[i].size, DMA_BIDIRECTIONAL); 162 } 163 164 list[j].dma_addr = 0; 165 } 166 return ret; 167 } 168 169 static inline int setup_sgio_list(struct pci_dev *pdev, 170 struct otx_cpt_info_buffer **pinfo, 171 struct otx_cpt_req_info *req, gfp_t gfp) 172 { 173 u32 dlen, align_dlen, info_len, rlen; 174 struct otx_cpt_info_buffer *info; 175 u16 g_sz_bytes, s_sz_bytes; 176 int align = CPT_DMA_ALIGN; 177 u32 total_mem_len; 178 179 if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT || 180 req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) { 181 dev_err(&pdev->dev, "Error too many sg components\n"); 182 return -EINVAL; 183 } 184 185 g_sz_bytes = ((req->incnt + 3) / 4) * 186 sizeof(struct otx_cpt_sglist_component); 187 s_sz_bytes = ((req->outcnt + 3) / 4) * 188 sizeof(struct otx_cpt_sglist_component); 189 190 dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; 191 align_dlen = ALIGN(dlen, align); 192 info_len = ALIGN(sizeof(*info), align); 193 rlen = ALIGN(sizeof(union otx_cpt_res_s), align); 194 total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE; 195 196 info = kzalloc(total_mem_len, gfp); 197 if (unlikely(!info)) { 198 dev_err(&pdev->dev, "Memory allocation failed\n"); 199 return -ENOMEM; 200 } 201 *pinfo = info; 202 info->dlen = dlen; 203 info->in_buffer = (u8 *)info + info_len; 204 205 ((u16 *)info->in_buffer)[0] = req->outcnt; 206 ((u16 *)info->in_buffer)[1] = req->incnt; 207 ((u16 *)info->in_buffer)[2] = 0; 208 ((u16 *)info->in_buffer)[3] = 0; 209 *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer); 210 211 /* Setup gather (input) components */ 212 if (setup_sgio_components(pdev, req->in, req->incnt, 213 &info->in_buffer[8])) { 214 dev_err(&pdev->dev, "Failed to setup gather list\n"); 215 return -EFAULT; 216 } 217 218 if (setup_sgio_components(pdev, req->out, req->outcnt, 219 &info->in_buffer[8 + g_sz_bytes])) { 220 dev_err(&pdev->dev, "Failed to setup scatter list\n"); 221 return -EFAULT; 222 } 223 224 info->dma_len = total_mem_len - info_len; 225 info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer, 226 info->dma_len, DMA_BIDIRECTIONAL); 227 if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { 228 dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); 229 return -EIO; 230 } 231 /* 232 * Get buffer for union otx_cpt_res_s response 233 * structure and its physical address 234 */ 235 info->completion_addr = (u64 *)(info->in_buffer + align_dlen); 236 info->comp_baddr = info->dptr_baddr + align_dlen; 237 238 /* Create and initialize RPTR */ 239 info->out_buffer = (u8 *)info->completion_addr + rlen; 240 info->rptr_baddr = info->comp_baddr + rlen; 241 242 *((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT); 243 244 return 0; 245 } 246 247 248 static void cpt_fill_inst(union otx_cpt_inst_s *inst, 249 struct otx_cpt_info_buffer *info, 250 struct otx_cpt_iq_cmd *cmd) 251 { 252 inst->u[0] = 0x0; 253 inst->s.doneint = true; 254 inst->s.res_addr = (u64)info->comp_baddr; 255 inst->u[2] = 0x0; 256 inst->s.wq_ptr = 0; 257 inst->s.ei0 = cmd->cmd.u64; 258 inst->s.ei1 = cmd->dptr; 259 inst->s.ei2 = cmd->rptr; 260 inst->s.ei3 = cmd->cptr.u64; 261 } 262 263 /* 264 * On OcteonTX platform the parameter db_count is used as a count for ringing 265 * door bell. The valid values for db_count are: 266 * 0 - 1 CPT instruction will be enqueued however CPT will not be informed 267 * 1 - 1 CPT instruction will be enqueued and CPT will be informed 268 */ 269 static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf) 270 { 271 struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo; 272 struct otx_cpt_cmd_queue *queue; 273 struct otx_cpt_cmd_chunk *curr; 274 u8 *ent; 275 276 queue = &qinfo->queue[0]; 277 /* 278 * cpt_send_cmd is currently called only from critical section 279 * therefore no locking is required for accessing instruction queue 280 */ 281 ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE]; 282 memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE); 283 284 if (++queue->idx >= queue->qhead->size / 64) { 285 curr = queue->qhead; 286 287 if (list_is_last(&curr->nextchunk, &queue->chead)) 288 queue->qhead = queue->base; 289 else 290 queue->qhead = list_next_entry(queue->qhead, nextchunk); 291 queue->idx = 0; 292 } 293 /* make sure all memory stores are done before ringing doorbell */ 294 smp_wmb(); 295 otx_cptvf_write_vq_doorbell(cptvf, 1); 296 } 297 298 static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, 299 struct otx_cpt_pending_queue *pqueue, 300 struct otx_cptvf *cptvf) 301 { 302 struct otx_cptvf_request *cpt_req = &req->req; 303 struct otx_cpt_pending_entry *pentry = NULL; 304 union otx_cpt_ctrl_info *ctrl = &req->ctrl; 305 struct otx_cpt_info_buffer *info = NULL; 306 union otx_cpt_res_s *result = NULL; 307 struct otx_cpt_iq_cmd iq_cmd; 308 union otx_cpt_inst_s cptinst; 309 int retry, ret = 0; 310 u8 resume_sender; 311 gfp_t gfp; 312 313 gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : 314 GFP_ATOMIC; 315 ret = setup_sgio_list(pdev, &info, req, gfp); 316 if (unlikely(ret)) { 317 dev_err(&pdev->dev, "Setting up SG list failed\n"); 318 goto request_cleanup; 319 } 320 cpt_req->dlen = info->dlen; 321 322 result = (union otx_cpt_res_s *) info->completion_addr; 323 result->s.compcode = COMPLETION_CODE_INIT; 324 325 spin_lock_bh(&pqueue->lock); 326 pentry = get_free_pending_entry(pqueue, pqueue->qlen); 327 retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; 328 while (unlikely(!pentry) && retry--) { 329 spin_unlock_bh(&pqueue->lock); 330 udelay(CPT_PENTRY_STEP); 331 spin_lock_bh(&pqueue->lock); 332 pentry = get_free_pending_entry(pqueue, pqueue->qlen); 333 } 334 335 if (unlikely(!pentry)) { 336 ret = -ENOSPC; 337 spin_unlock_bh(&pqueue->lock); 338 goto request_cleanup; 339 } 340 341 /* 342 * Check if we are close to filling in entire pending queue, 343 * if so then tell the sender to stop/sleep by returning -EBUSY 344 * We do it only for context which can sleep (GFP_KERNEL) 345 */ 346 if (gfp == GFP_KERNEL && 347 pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { 348 pentry->resume_sender = true; 349 } else 350 pentry->resume_sender = false; 351 resume_sender = pentry->resume_sender; 352 pqueue->pending_count++; 353 354 pentry->completion_addr = info->completion_addr; 355 pentry->info = info; 356 pentry->callback = req->callback; 357 pentry->areq = req->areq; 358 pentry->busy = true; 359 info->pentry = pentry; 360 info->time_in = jiffies; 361 info->req = req; 362 363 /* Fill in the command */ 364 iq_cmd.cmd.u64 = 0; 365 iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); 366 iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); 367 iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); 368 iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); 369 370 /* 64-bit swap for microcode data reads, not needed for addresses*/ 371 iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64); 372 iq_cmd.dptr = info->dptr_baddr; 373 iq_cmd.rptr = info->rptr_baddr; 374 iq_cmd.cptr.u64 = 0; 375 iq_cmd.cptr.s.grp = ctrl->s.grp; 376 377 /* Fill in the CPT_INST_S type command for HW interpretation */ 378 cpt_fill_inst(&cptinst, info, &iq_cmd); 379 380 /* Print debug info if enabled */ 381 otx_cpt_dump_sg_list(pdev, req); 382 pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE); 383 print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false); 384 pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); 385 print_hex_dump_debug("", 0, 16, 1, info->in_buffer, 386 cpt_req->dlen, false); 387 388 /* Send CPT command */ 389 cpt_send_cmd(&cptinst, cptvf); 390 391 /* 392 * We allocate and prepare pending queue entry in critical section 393 * together with submitting CPT instruction to CPT instruction queue 394 * to make sure that order of CPT requests is the same in both 395 * pending and instruction queues 396 */ 397 spin_unlock_bh(&pqueue->lock); 398 399 ret = resume_sender ? -EBUSY : -EINPROGRESS; 400 return ret; 401 402 request_cleanup: 403 do_request_cleanup(pdev, info); 404 return ret; 405 } 406 407 int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, 408 int cpu_num) 409 { 410 struct otx_cptvf *cptvf = pci_get_drvdata(pdev); 411 412 if (!otx_cpt_device_ready(cptvf)) { 413 dev_err(&pdev->dev, "CPT Device is not ready\n"); 414 return -ENODEV; 415 } 416 417 if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) { 418 dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request\n", 419 cptvf->vfid); 420 return -EINVAL; 421 } else if ((cptvf->vftype == OTX_CPT_AE_TYPES) && 422 (req->ctrl.s.se_req)) { 423 dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request\n", 424 cptvf->vfid); 425 return -EINVAL; 426 } 427 428 return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf); 429 } 430 431 static int cpt_process_ccode(struct pci_dev *pdev, 432 union otx_cpt_res_s *cpt_status, 433 struct otx_cpt_info_buffer *cpt_info, 434 struct otx_cpt_req_info *req, u32 *res_code) 435 { 436 u8 ccode = cpt_status->s.compcode; 437 union otx_cpt_error_code ecode; 438 439 ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer)); 440 switch (ccode) { 441 case CPT_COMP_E_FAULT: 442 dev_err(&pdev->dev, 443 "Request failed with DMA fault\n"); 444 otx_cpt_dump_sg_list(pdev, req); 445 break; 446 447 case CPT_COMP_E_SWERR: 448 dev_err(&pdev->dev, 449 "Request failed with software error code %d\n", 450 ecode.s.ccode); 451 otx_cpt_dump_sg_list(pdev, req); 452 break; 453 454 case CPT_COMP_E_HWERR: 455 dev_err(&pdev->dev, 456 "Request failed with hardware error\n"); 457 otx_cpt_dump_sg_list(pdev, req); 458 break; 459 460 case COMPLETION_CODE_INIT: 461 /* check for timeout */ 462 if (time_after_eq(jiffies, cpt_info->time_in + 463 OTX_CPT_COMMAND_TIMEOUT * HZ)) 464 dev_warn(&pdev->dev, "Request timed out 0x%p\n", req); 465 else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) { 466 cpt_info->time_in = jiffies; 467 cpt_info->extra_time++; 468 } 469 return 1; 470 471 case CPT_COMP_E_GOOD: 472 /* Check microcode completion code */ 473 if (ecode.s.ccode) { 474 /* 475 * If requested hmac is truncated and ucode returns 476 * s/g write length error then we report success 477 * because ucode writes as many bytes of calculated 478 * hmac as available in gather buffer and reports 479 * s/g write length error if number of bytes in gather 480 * buffer is less than full hmac size. 481 */ 482 if (req->is_trunc_hmac && 483 ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) { 484 *res_code = 0; 485 break; 486 } 487 488 dev_err(&pdev->dev, 489 "Request failed with software error code 0x%x\n", 490 ecode.s.ccode); 491 otx_cpt_dump_sg_list(pdev, req); 492 break; 493 } 494 495 /* Request has been processed with success */ 496 *res_code = 0; 497 break; 498 499 default: 500 dev_err(&pdev->dev, "Request returned invalid status\n"); 501 break; 502 } 503 504 return 0; 505 } 506 507 static inline void process_pending_queue(struct pci_dev *pdev, 508 struct otx_cpt_pending_queue *pqueue) 509 { 510 void (*callback)(int status, void *arg1, void *arg2); 511 struct otx_cpt_pending_entry *resume_pentry = NULL; 512 struct otx_cpt_pending_entry *pentry = NULL; 513 struct otx_cpt_info_buffer *cpt_info = NULL; 514 union otx_cpt_res_s *cpt_status = NULL; 515 struct otx_cpt_req_info *req = NULL; 516 struct crypto_async_request *areq; 517 u32 res_code, resume_index; 518 519 while (1) { 520 spin_lock_bh(&pqueue->lock); 521 pentry = &pqueue->head[pqueue->front]; 522 523 if (WARN_ON(!pentry)) { 524 spin_unlock_bh(&pqueue->lock); 525 break; 526 } 527 528 res_code = -EINVAL; 529 if (unlikely(!pentry->busy)) { 530 spin_unlock_bh(&pqueue->lock); 531 break; 532 } 533 534 if (unlikely(!pentry->callback)) { 535 dev_err(&pdev->dev, "Callback NULL\n"); 536 goto process_pentry; 537 } 538 539 cpt_info = pentry->info; 540 if (unlikely(!cpt_info)) { 541 dev_err(&pdev->dev, "Pending entry post arg NULL\n"); 542 goto process_pentry; 543 } 544 545 req = cpt_info->req; 546 if (unlikely(!req)) { 547 dev_err(&pdev->dev, "Request NULL\n"); 548 goto process_pentry; 549 } 550 551 cpt_status = (union otx_cpt_res_s *) pentry->completion_addr; 552 if (unlikely(!cpt_status)) { 553 dev_err(&pdev->dev, "Completion address NULL\n"); 554 goto process_pentry; 555 } 556 557 if (cpt_process_ccode(pdev, cpt_status, cpt_info, req, 558 &res_code)) { 559 spin_unlock_bh(&pqueue->lock); 560 return; 561 } 562 cpt_info->pdev = pdev; 563 564 process_pentry: 565 /* 566 * Check if we should inform sending side to resume 567 * We do it CPT_IQ_RESUME_MARGIN elements in advance before 568 * pending queue becomes empty 569 */ 570 resume_index = modulo_inc(pqueue->front, pqueue->qlen, 571 CPT_IQ_RESUME_MARGIN); 572 resume_pentry = &pqueue->head[resume_index]; 573 if (resume_pentry && 574 resume_pentry->resume_sender) { 575 resume_pentry->resume_sender = false; 576 callback = resume_pentry->callback; 577 areq = resume_pentry->areq; 578 579 if (callback) { 580 spin_unlock_bh(&pqueue->lock); 581 582 /* 583 * EINPROGRESS is an indication for sending 584 * side that it can resume sending requests 585 */ 586 callback(-EINPROGRESS, areq, cpt_info); 587 spin_lock_bh(&pqueue->lock); 588 } 589 } 590 591 callback = pentry->callback; 592 areq = pentry->areq; 593 free_pentry(pentry); 594 595 pqueue->pending_count--; 596 pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); 597 spin_unlock_bh(&pqueue->lock); 598 599 /* 600 * Call callback after current pending entry has been 601 * processed, we don't do it if the callback pointer is 602 * invalid. 603 */ 604 if (callback) 605 callback(res_code, areq, cpt_info); 606 } 607 } 608 609 void otx_cpt_post_process(struct otx_cptvf_wqe *wqe) 610 { 611 process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]); 612 } 613