1 /* 2 * AMD Cryptographic Coprocessor (CCP) driver 3 * 4 * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. 5 * 6 * Author: Tom Lendacky <thomas.lendacky@amd.com> 7 * Author: Gary R Hook <gary.hook@amd.com> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/kernel.h> 16 #include <linux/pci.h> 17 #include <linux/kthread.h> 18 #include <linux/interrupt.h> 19 #include <linux/ccp.h> 20 21 #include "ccp-dev.h" 22 23 static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count) 24 { 25 int start; 26 struct ccp_device *ccp = cmd_q->ccp; 27 28 for (;;) { 29 mutex_lock(&ccp->sb_mutex); 30 31 start = (u32)bitmap_find_next_zero_area(ccp->sb, 32 ccp->sb_count, 33 ccp->sb_start, 34 count, 0); 35 if (start <= ccp->sb_count) { 36 bitmap_set(ccp->sb, start, count); 37 38 mutex_unlock(&ccp->sb_mutex); 39 break; 40 } 41 42 ccp->sb_avail = 0; 43 44 mutex_unlock(&ccp->sb_mutex); 45 46 /* Wait for KSB entries to become available */ 47 if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) 48 return 0; 49 } 50 51 return KSB_START + start; 52 } 53 54 static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start, 55 unsigned int count) 56 { 57 struct ccp_device *ccp = cmd_q->ccp; 58 59 if (!start) 60 return; 61 62 mutex_lock(&ccp->sb_mutex); 63 64 bitmap_clear(ccp->sb, start - KSB_START, count); 65 66 ccp->sb_avail = 1; 67 68 mutex_unlock(&ccp->sb_mutex); 69 70 wake_up_interruptible_all(&ccp->sb_queue); 71 } 72 73 static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q) 74 { 75 return CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); 76 } 77 78 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) 79 { 80 struct ccp_cmd_queue *cmd_q = op->cmd_q; 81 struct ccp_device *ccp = cmd_q->ccp; 82 void __iomem *cr_addr; 83 u32 cr0, cmd; 84 unsigned int i; 85 int ret = 0; 86 87 /* We could read a status register to see how many free slots 88 * are actually available, but reading that register resets it 89 * and you could lose some error information. 90 */ 91 cmd_q->free_slots--; 92 93 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) 94 | (op->jobid << REQ0_JOBID_SHIFT) 95 | REQ0_WAIT_FOR_WRITE; 96 97 if (op->soc) 98 cr0 |= REQ0_STOP_ON_COMPLETE 99 | REQ0_INT_ON_COMPLETE; 100 101 if (op->ioc || !cmd_q->free_slots) 102 cr0 |= REQ0_INT_ON_COMPLETE; 103 104 /* Start at CMD_REQ1 */ 105 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; 106 107 mutex_lock(&ccp->req_mutex); 108 109 /* Write CMD_REQ1 through CMD_REQx first */ 110 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) 111 iowrite32(*(cr + i), cr_addr); 112 113 /* Tell the CCP to start */ 114 wmb(); 115 iowrite32(cr0, ccp->io_regs + CMD_REQ0); 116 117 mutex_unlock(&ccp->req_mutex); 118 119 if (cr0 & REQ0_INT_ON_COMPLETE) { 120 /* Wait for the job to complete */ 121 ret = wait_event_interruptible(cmd_q->int_queue, 122 cmd_q->int_rcvd); 123 if (ret || cmd_q->cmd_error) { 124 /* On error delete all related jobs from the queue */ 125 cmd = (cmd_q->id << DEL_Q_ID_SHIFT) 126 | op->jobid; 127 if (cmd_q->cmd_error) 128 ccp_log_error(cmd_q->ccp, 129 cmd_q->cmd_error); 130 131 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); 132 133 if (!ret) 134 ret = -EIO; 135 } else if (op->soc) { 136 /* Delete just head job from the queue on SoC */ 137 cmd = DEL_Q_ACTIVE 138 | (cmd_q->id << DEL_Q_ID_SHIFT) 139 | op->jobid; 140 141 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); 142 } 143 144 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); 145 146 cmd_q->int_rcvd = 0; 147 } 148 149 return ret; 150 } 151 152 static int ccp_perform_aes(struct ccp_op *op) 153 { 154 u32 cr[6]; 155 156 /* Fill out the register contents for REQ1 through REQ6 */ 157 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) 158 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) 159 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) 160 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) 161 | (op->sb_key << REQ1_KEY_KSB_SHIFT); 162 cr[1] = op->src.u.dma.length - 1; 163 cr[2] = ccp_addr_lo(&op->src.u.dma); 164 cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) 165 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 166 | ccp_addr_hi(&op->src.u.dma); 167 cr[4] = ccp_addr_lo(&op->dst.u.dma); 168 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 169 | ccp_addr_hi(&op->dst.u.dma); 170 171 if (op->u.aes.mode == CCP_AES_MODE_CFB) 172 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); 173 174 if (op->eom) 175 cr[0] |= REQ1_EOM; 176 177 if (op->init) 178 cr[0] |= REQ1_INIT; 179 180 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 181 } 182 183 static int ccp_perform_xts_aes(struct ccp_op *op) 184 { 185 u32 cr[6]; 186 187 /* Fill out the register contents for REQ1 through REQ6 */ 188 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) 189 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) 190 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) 191 | (op->sb_key << REQ1_KEY_KSB_SHIFT); 192 cr[1] = op->src.u.dma.length - 1; 193 cr[2] = ccp_addr_lo(&op->src.u.dma); 194 cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) 195 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 196 | ccp_addr_hi(&op->src.u.dma); 197 cr[4] = ccp_addr_lo(&op->dst.u.dma); 198 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 199 | ccp_addr_hi(&op->dst.u.dma); 200 201 if (op->eom) 202 cr[0] |= REQ1_EOM; 203 204 if (op->init) 205 cr[0] |= REQ1_INIT; 206 207 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 208 } 209 210 static int ccp_perform_sha(struct ccp_op *op) 211 { 212 u32 cr[6]; 213 214 /* Fill out the register contents for REQ1 through REQ6 */ 215 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) 216 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) 217 | REQ1_INIT; 218 cr[1] = op->src.u.dma.length - 1; 219 cr[2] = ccp_addr_lo(&op->src.u.dma); 220 cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) 221 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 222 | ccp_addr_hi(&op->src.u.dma); 223 224 if (op->eom) { 225 cr[0] |= REQ1_EOM; 226 cr[4] = lower_32_bits(op->u.sha.msg_bits); 227 cr[5] = upper_32_bits(op->u.sha.msg_bits); 228 } else { 229 cr[4] = 0; 230 cr[5] = 0; 231 } 232 233 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 234 } 235 236 static int ccp_perform_rsa(struct ccp_op *op) 237 { 238 u32 cr[6]; 239 240 /* Fill out the register contents for REQ1 through REQ6 */ 241 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) 242 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) 243 | (op->sb_key << REQ1_KEY_KSB_SHIFT) 244 | REQ1_EOM; 245 cr[1] = op->u.rsa.input_len - 1; 246 cr[2] = ccp_addr_lo(&op->src.u.dma); 247 cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) 248 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 249 | ccp_addr_hi(&op->src.u.dma); 250 cr[4] = ccp_addr_lo(&op->dst.u.dma); 251 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 252 | ccp_addr_hi(&op->dst.u.dma); 253 254 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 255 } 256 257 static int ccp_perform_passthru(struct ccp_op *op) 258 { 259 u32 cr[6]; 260 261 /* Fill out the register contents for REQ1 through REQ6 */ 262 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) 263 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) 264 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); 265 266 if (op->src.type == CCP_MEMTYPE_SYSTEM) 267 cr[1] = op->src.u.dma.length - 1; 268 else 269 cr[1] = op->dst.u.dma.length - 1; 270 271 if (op->src.type == CCP_MEMTYPE_SYSTEM) { 272 cr[2] = ccp_addr_lo(&op->src.u.dma); 273 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 274 | ccp_addr_hi(&op->src.u.dma); 275 276 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) 277 cr[3] |= (op->sb_key << REQ4_KSB_SHIFT); 278 } else { 279 cr[2] = op->src.u.sb * CCP_SB_BYTES; 280 cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT); 281 } 282 283 if (op->dst.type == CCP_MEMTYPE_SYSTEM) { 284 cr[4] = ccp_addr_lo(&op->dst.u.dma); 285 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 286 | ccp_addr_hi(&op->dst.u.dma); 287 } else { 288 cr[4] = op->dst.u.sb * CCP_SB_BYTES; 289 cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT); 290 } 291 292 if (op->eom) 293 cr[0] |= REQ1_EOM; 294 295 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 296 } 297 298 static int ccp_perform_ecc(struct ccp_op *op) 299 { 300 u32 cr[6]; 301 302 /* Fill out the register contents for REQ1 through REQ6 */ 303 cr[0] = REQ1_ECC_AFFINE_CONVERT 304 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) 305 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) 306 | REQ1_EOM; 307 cr[1] = op->src.u.dma.length - 1; 308 cr[2] = ccp_addr_lo(&op->src.u.dma); 309 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 310 | ccp_addr_hi(&op->src.u.dma); 311 cr[4] = ccp_addr_lo(&op->dst.u.dma); 312 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 313 | ccp_addr_hi(&op->dst.u.dma); 314 315 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 316 } 317 318 static int ccp_init(struct ccp_device *ccp) 319 { 320 struct device *dev = ccp->dev; 321 struct ccp_cmd_queue *cmd_q; 322 struct dma_pool *dma_pool; 323 char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; 324 unsigned int qmr, qim, i; 325 int ret; 326 327 /* Find available queues */ 328 qim = 0; 329 qmr = ioread32(ccp->io_regs + Q_MASK_REG); 330 for (i = 0; i < MAX_HW_QUEUES; i++) { 331 if (!(qmr & (1 << i))) 332 continue; 333 334 /* Allocate a dma pool for this queue */ 335 snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", 336 ccp->name, i); 337 dma_pool = dma_pool_create(dma_pool_name, dev, 338 CCP_DMAPOOL_MAX_SIZE, 339 CCP_DMAPOOL_ALIGN, 0); 340 if (!dma_pool) { 341 dev_err(dev, "unable to allocate dma pool\n"); 342 ret = -ENOMEM; 343 goto e_pool; 344 } 345 346 cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; 347 ccp->cmd_q_count++; 348 349 cmd_q->ccp = ccp; 350 cmd_q->id = i; 351 cmd_q->dma_pool = dma_pool; 352 353 /* Reserve 2 KSB regions for the queue */ 354 cmd_q->sb_key = KSB_START + ccp->sb_start++; 355 cmd_q->sb_ctx = KSB_START + ccp->sb_start++; 356 ccp->sb_count -= 2; 357 358 /* Preset some register values and masks that are queue 359 * number dependent 360 */ 361 cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + 362 (CMD_Q_STATUS_INCR * i); 363 cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + 364 (CMD_Q_STATUS_INCR * i); 365 cmd_q->int_ok = 1 << (i * 2); 366 cmd_q->int_err = 1 << ((i * 2) + 1); 367 368 cmd_q->free_slots = ccp_get_free_slots(cmd_q); 369 370 init_waitqueue_head(&cmd_q->int_queue); 371 372 /* Build queue interrupt mask (two interrupts per queue) */ 373 qim |= cmd_q->int_ok | cmd_q->int_err; 374 375 #ifdef CONFIG_ARM64 376 /* For arm64 set the recommended queue cache settings */ 377 iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE + 378 (CMD_Q_CACHE_INC * i)); 379 #endif 380 381 dev_dbg(dev, "queue #%u available\n", i); 382 } 383 if (ccp->cmd_q_count == 0) { 384 dev_notice(dev, "no command queues available\n"); 385 ret = -EIO; 386 goto e_pool; 387 } 388 dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); 389 390 /* Disable and clear interrupts until ready */ 391 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); 392 for (i = 0; i < ccp->cmd_q_count; i++) { 393 cmd_q = &ccp->cmd_q[i]; 394 395 ioread32(cmd_q->reg_int_status); 396 ioread32(cmd_q->reg_status); 397 } 398 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); 399 400 /* Request an irq */ 401 ret = ccp->get_irq(ccp); 402 if (ret) { 403 dev_err(dev, "unable to allocate an IRQ\n"); 404 goto e_pool; 405 } 406 407 /* Initialize the queues used to wait for KSB space and suspend */ 408 init_waitqueue_head(&ccp->sb_queue); 409 init_waitqueue_head(&ccp->suspend_queue); 410 411 dev_dbg(dev, "Starting threads...\n"); 412 /* Create a kthread for each queue */ 413 for (i = 0; i < ccp->cmd_q_count; i++) { 414 struct task_struct *kthread; 415 416 cmd_q = &ccp->cmd_q[i]; 417 418 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, 419 "%s-q%u", ccp->name, cmd_q->id); 420 if (IS_ERR(kthread)) { 421 dev_err(dev, "error creating queue thread (%ld)\n", 422 PTR_ERR(kthread)); 423 ret = PTR_ERR(kthread); 424 goto e_kthread; 425 } 426 427 cmd_q->kthread = kthread; 428 wake_up_process(kthread); 429 } 430 431 dev_dbg(dev, "Enabling interrupts...\n"); 432 /* Enable interrupts */ 433 iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); 434 435 dev_dbg(dev, "Registering device...\n"); 436 ccp_add_device(ccp); 437 438 ret = ccp_register_rng(ccp); 439 if (ret) 440 goto e_kthread; 441 442 /* Register the DMA engine support */ 443 ret = ccp_dmaengine_register(ccp); 444 if (ret) 445 goto e_hwrng; 446 447 return 0; 448 449 e_hwrng: 450 ccp_unregister_rng(ccp); 451 452 e_kthread: 453 for (i = 0; i < ccp->cmd_q_count; i++) 454 if (ccp->cmd_q[i].kthread) 455 kthread_stop(ccp->cmd_q[i].kthread); 456 457 ccp->free_irq(ccp); 458 459 e_pool: 460 for (i = 0; i < ccp->cmd_q_count; i++) 461 dma_pool_destroy(ccp->cmd_q[i].dma_pool); 462 463 return ret; 464 } 465 466 static void ccp_destroy(struct ccp_device *ccp) 467 { 468 struct ccp_cmd_queue *cmd_q; 469 struct ccp_cmd *cmd; 470 unsigned int qim, i; 471 472 /* Unregister the DMA engine */ 473 ccp_dmaengine_unregister(ccp); 474 475 /* Unregister the RNG */ 476 ccp_unregister_rng(ccp); 477 478 /* Remove this device from the list of available units */ 479 ccp_del_device(ccp); 480 481 /* Build queue interrupt mask (two interrupt masks per queue) */ 482 qim = 0; 483 for (i = 0; i < ccp->cmd_q_count; i++) { 484 cmd_q = &ccp->cmd_q[i]; 485 qim |= cmd_q->int_ok | cmd_q->int_err; 486 } 487 488 /* Disable and clear interrupts */ 489 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); 490 for (i = 0; i < ccp->cmd_q_count; i++) { 491 cmd_q = &ccp->cmd_q[i]; 492 493 ioread32(cmd_q->reg_int_status); 494 ioread32(cmd_q->reg_status); 495 } 496 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); 497 498 /* Stop the queue kthreads */ 499 for (i = 0; i < ccp->cmd_q_count; i++) 500 if (ccp->cmd_q[i].kthread) 501 kthread_stop(ccp->cmd_q[i].kthread); 502 503 ccp->free_irq(ccp); 504 505 for (i = 0; i < ccp->cmd_q_count; i++) 506 dma_pool_destroy(ccp->cmd_q[i].dma_pool); 507 508 /* Flush the cmd and backlog queue */ 509 while (!list_empty(&ccp->cmd)) { 510 /* Invoke the callback directly with an error code */ 511 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); 512 list_del(&cmd->entry); 513 cmd->callback(cmd->data, -ENODEV); 514 } 515 while (!list_empty(&ccp->backlog)) { 516 /* Invoke the callback directly with an error code */ 517 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); 518 list_del(&cmd->entry); 519 cmd->callback(cmd->data, -ENODEV); 520 } 521 } 522 523 static irqreturn_t ccp_irq_handler(int irq, void *data) 524 { 525 struct device *dev = data; 526 struct ccp_device *ccp = dev_get_drvdata(dev); 527 struct ccp_cmd_queue *cmd_q; 528 u32 q_int, status; 529 unsigned int i; 530 531 status = ioread32(ccp->io_regs + IRQ_STATUS_REG); 532 533 for (i = 0; i < ccp->cmd_q_count; i++) { 534 cmd_q = &ccp->cmd_q[i]; 535 536 q_int = status & (cmd_q->int_ok | cmd_q->int_err); 537 if (q_int) { 538 cmd_q->int_status = status; 539 cmd_q->q_status = ioread32(cmd_q->reg_status); 540 cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); 541 542 /* On error, only save the first error value */ 543 if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) 544 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); 545 546 cmd_q->int_rcvd = 1; 547 548 /* Acknowledge the interrupt and wake the kthread */ 549 iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); 550 wake_up_interruptible(&cmd_q->int_queue); 551 } 552 } 553 554 return IRQ_HANDLED; 555 } 556 557 static const struct ccp_actions ccp3_actions = { 558 .aes = ccp_perform_aes, 559 .xts_aes = ccp_perform_xts_aes, 560 .sha = ccp_perform_sha, 561 .rsa = ccp_perform_rsa, 562 .passthru = ccp_perform_passthru, 563 .ecc = ccp_perform_ecc, 564 .sballoc = ccp_alloc_ksb, 565 .sbfree = ccp_free_ksb, 566 .init = ccp_init, 567 .destroy = ccp_destroy, 568 .get_free_slots = ccp_get_free_slots, 569 .irqhandler = ccp_irq_handler, 570 }; 571 572 const struct ccp_vdata ccpv3 = { 573 .version = CCP_VERSION(3, 0), 574 .setup = NULL, 575 .perform = &ccp3_actions, 576 .bar = 2, 577 .offset = 0x20000, 578 }; 579