1 /* 2 * AMD Cryptographic Coprocessor (CCP) driver 3 * 4 * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. 5 * 6 * Author: Tom Lendacky <thomas.lendacky@amd.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/kthread.h> 17 #include <linux/interrupt.h> 18 #include <linux/ccp.h> 19 20 #include "ccp-dev.h" 21 22 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) 23 { 24 struct ccp_cmd_queue *cmd_q = op->cmd_q; 25 struct ccp_device *ccp = cmd_q->ccp; 26 void __iomem *cr_addr; 27 u32 cr0, cmd; 28 unsigned int i; 29 int ret = 0; 30 31 /* We could read a status register to see how many free slots 32 * are actually available, but reading that register resets it 33 * and you could lose some error information. 34 */ 35 cmd_q->free_slots--; 36 37 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) 38 | (op->jobid << REQ0_JOBID_SHIFT) 39 | REQ0_WAIT_FOR_WRITE; 40 41 if (op->soc) 42 cr0 |= REQ0_STOP_ON_COMPLETE 43 | REQ0_INT_ON_COMPLETE; 44 45 if (op->ioc || !cmd_q->free_slots) 46 cr0 |= REQ0_INT_ON_COMPLETE; 47 48 /* Start at CMD_REQ1 */ 49 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; 50 51 mutex_lock(&ccp->req_mutex); 52 53 /* Write CMD_REQ1 through CMD_REQx first */ 54 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) 55 iowrite32(*(cr + i), cr_addr); 56 57 /* Tell the CCP to start */ 58 wmb(); 59 iowrite32(cr0, ccp->io_regs + CMD_REQ0); 60 61 mutex_unlock(&ccp->req_mutex); 62 63 if (cr0 & REQ0_INT_ON_COMPLETE) { 64 /* Wait for the job to complete */ 65 ret = wait_event_interruptible(cmd_q->int_queue, 66 cmd_q->int_rcvd); 67 if (ret || cmd_q->cmd_error) { 68 /* On error delete all related jobs from the queue */ 69 cmd = (cmd_q->id << DEL_Q_ID_SHIFT) 70 | op->jobid; 71 72 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); 73 74 if (!ret) 75 ret = -EIO; 76 } else if (op->soc) { 77 /* Delete just head job from the queue on SoC */ 78 cmd = DEL_Q_ACTIVE 79 | (cmd_q->id << DEL_Q_ID_SHIFT) 80 | op->jobid; 81 82 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); 83 } 84 85 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); 86 87 cmd_q->int_rcvd = 0; 88 } 89 90 return ret; 91 } 92 93 static int ccp_perform_aes(struct ccp_op *op) 94 { 95 u32 cr[6]; 96 97 /* Fill out the register contents for REQ1 through REQ6 */ 98 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) 99 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) 100 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) 101 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) 102 | (op->ksb_key << REQ1_KEY_KSB_SHIFT); 103 cr[1] = op->src.u.dma.length - 1; 104 cr[2] = ccp_addr_lo(&op->src.u.dma); 105 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) 106 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 107 | ccp_addr_hi(&op->src.u.dma); 108 cr[4] = ccp_addr_lo(&op->dst.u.dma); 109 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 110 | ccp_addr_hi(&op->dst.u.dma); 111 112 if (op->u.aes.mode == CCP_AES_MODE_CFB) 113 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); 114 115 if (op->eom) 116 cr[0] |= REQ1_EOM; 117 118 if (op->init) 119 cr[0] |= REQ1_INIT; 120 121 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 122 } 123 124 static int ccp_perform_xts_aes(struct ccp_op *op) 125 { 126 u32 cr[6]; 127 128 /* Fill out the register contents for REQ1 through REQ6 */ 129 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) 130 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) 131 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) 132 | (op->ksb_key << REQ1_KEY_KSB_SHIFT); 133 cr[1] = op->src.u.dma.length - 1; 134 cr[2] = ccp_addr_lo(&op->src.u.dma); 135 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) 136 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 137 | ccp_addr_hi(&op->src.u.dma); 138 cr[4] = ccp_addr_lo(&op->dst.u.dma); 139 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 140 | ccp_addr_hi(&op->dst.u.dma); 141 142 if (op->eom) 143 cr[0] |= REQ1_EOM; 144 145 if (op->init) 146 cr[0] |= REQ1_INIT; 147 148 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 149 } 150 151 static int ccp_perform_sha(struct ccp_op *op) 152 { 153 u32 cr[6]; 154 155 /* Fill out the register contents for REQ1 through REQ6 */ 156 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) 157 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) 158 | REQ1_INIT; 159 cr[1] = op->src.u.dma.length - 1; 160 cr[2] = ccp_addr_lo(&op->src.u.dma); 161 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) 162 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 163 | ccp_addr_hi(&op->src.u.dma); 164 165 if (op->eom) { 166 cr[0] |= REQ1_EOM; 167 cr[4] = lower_32_bits(op->u.sha.msg_bits); 168 cr[5] = upper_32_bits(op->u.sha.msg_bits); 169 } else { 170 cr[4] = 0; 171 cr[5] = 0; 172 } 173 174 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 175 } 176 177 static int ccp_perform_rsa(struct ccp_op *op) 178 { 179 u32 cr[6]; 180 181 /* Fill out the register contents for REQ1 through REQ6 */ 182 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) 183 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) 184 | (op->ksb_key << REQ1_KEY_KSB_SHIFT) 185 | REQ1_EOM; 186 cr[1] = op->u.rsa.input_len - 1; 187 cr[2] = ccp_addr_lo(&op->src.u.dma); 188 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) 189 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 190 | ccp_addr_hi(&op->src.u.dma); 191 cr[4] = ccp_addr_lo(&op->dst.u.dma); 192 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 193 | ccp_addr_hi(&op->dst.u.dma); 194 195 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 196 } 197 198 static int ccp_perform_passthru(struct ccp_op *op) 199 { 200 u32 cr[6]; 201 202 /* Fill out the register contents for REQ1 through REQ6 */ 203 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) 204 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) 205 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); 206 207 if (op->src.type == CCP_MEMTYPE_SYSTEM) 208 cr[1] = op->src.u.dma.length - 1; 209 else 210 cr[1] = op->dst.u.dma.length - 1; 211 212 if (op->src.type == CCP_MEMTYPE_SYSTEM) { 213 cr[2] = ccp_addr_lo(&op->src.u.dma); 214 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 215 | ccp_addr_hi(&op->src.u.dma); 216 217 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) 218 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT); 219 } else { 220 cr[2] = op->src.u.ksb * CCP_KSB_BYTES; 221 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT); 222 } 223 224 if (op->dst.type == CCP_MEMTYPE_SYSTEM) { 225 cr[4] = ccp_addr_lo(&op->dst.u.dma); 226 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 227 | ccp_addr_hi(&op->dst.u.dma); 228 } else { 229 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES; 230 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT); 231 } 232 233 if (op->eom) 234 cr[0] |= REQ1_EOM; 235 236 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 237 } 238 239 static int ccp_perform_ecc(struct ccp_op *op) 240 { 241 u32 cr[6]; 242 243 /* Fill out the register contents for REQ1 through REQ6 */ 244 cr[0] = REQ1_ECC_AFFINE_CONVERT 245 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) 246 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) 247 | REQ1_EOM; 248 cr[1] = op->src.u.dma.length - 1; 249 cr[2] = ccp_addr_lo(&op->src.u.dma); 250 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) 251 | ccp_addr_hi(&op->src.u.dma); 252 cr[4] = ccp_addr_lo(&op->dst.u.dma); 253 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) 254 | ccp_addr_hi(&op->dst.u.dma); 255 256 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); 257 } 258 259 static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) 260 { 261 struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); 262 u32 trng_value; 263 int len = min_t(int, sizeof(trng_value), max); 264 265 /* 266 * Locking is provided by the caller so we can update device 267 * hwrng-related fields safely 268 */ 269 trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); 270 if (!trng_value) { 271 /* Zero is returned if not data is available or if a 272 * bad-entropy error is present. Assume an error if 273 * we exceed TRNG_RETRIES reads of zero. 274 */ 275 if (ccp->hwrng_retries++ > TRNG_RETRIES) 276 return -EIO; 277 278 return 0; 279 } 280 281 /* Reset the counter and save the rng value */ 282 ccp->hwrng_retries = 0; 283 memcpy(data, &trng_value, len); 284 285 return len; 286 } 287 288 static int ccp_init(struct ccp_device *ccp) 289 { 290 struct device *dev = ccp->dev; 291 struct ccp_cmd_queue *cmd_q; 292 struct dma_pool *dma_pool; 293 char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; 294 unsigned int qmr, qim, i; 295 int ret; 296 297 /* Find available queues */ 298 qim = 0; 299 qmr = ioread32(ccp->io_regs + Q_MASK_REG); 300 for (i = 0; i < MAX_HW_QUEUES; i++) { 301 if (!(qmr & (1 << i))) 302 continue; 303 304 /* Allocate a dma pool for this queue */ 305 snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", 306 ccp->name, i); 307 dma_pool = dma_pool_create(dma_pool_name, dev, 308 CCP_DMAPOOL_MAX_SIZE, 309 CCP_DMAPOOL_ALIGN, 0); 310 if (!dma_pool) { 311 dev_err(dev, "unable to allocate dma pool\n"); 312 ret = -ENOMEM; 313 goto e_pool; 314 } 315 316 cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; 317 ccp->cmd_q_count++; 318 319 cmd_q->ccp = ccp; 320 cmd_q->id = i; 321 cmd_q->dma_pool = dma_pool; 322 323 /* Reserve 2 KSB regions for the queue */ 324 cmd_q->ksb_key = KSB_START + ccp->ksb_start++; 325 cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; 326 ccp->ksb_count -= 2; 327 328 /* Preset some register values and masks that are queue 329 * number dependent 330 */ 331 cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + 332 (CMD_Q_STATUS_INCR * i); 333 cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + 334 (CMD_Q_STATUS_INCR * i); 335 cmd_q->int_ok = 1 << (i * 2); 336 cmd_q->int_err = 1 << ((i * 2) + 1); 337 338 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); 339 340 init_waitqueue_head(&cmd_q->int_queue); 341 342 /* Build queue interrupt mask (two interrupts per queue) */ 343 qim |= cmd_q->int_ok | cmd_q->int_err; 344 345 #ifdef CONFIG_ARM64 346 /* For arm64 set the recommended queue cache settings */ 347 iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE + 348 (CMD_Q_CACHE_INC * i)); 349 #endif 350 351 dev_dbg(dev, "queue #%u available\n", i); 352 } 353 if (ccp->cmd_q_count == 0) { 354 dev_notice(dev, "no command queues available\n"); 355 ret = -EIO; 356 goto e_pool; 357 } 358 dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); 359 360 /* Disable and clear interrupts until ready */ 361 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); 362 for (i = 0; i < ccp->cmd_q_count; i++) { 363 cmd_q = &ccp->cmd_q[i]; 364 365 ioread32(cmd_q->reg_int_status); 366 ioread32(cmd_q->reg_status); 367 } 368 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); 369 370 /* Request an irq */ 371 ret = ccp->get_irq(ccp); 372 if (ret) { 373 dev_err(dev, "unable to allocate an IRQ\n"); 374 goto e_pool; 375 } 376 377 /* Initialize the queues used to wait for KSB space and suspend */ 378 init_waitqueue_head(&ccp->ksb_queue); 379 init_waitqueue_head(&ccp->suspend_queue); 380 381 /* Create a kthread for each queue */ 382 for (i = 0; i < ccp->cmd_q_count; i++) { 383 struct task_struct *kthread; 384 385 cmd_q = &ccp->cmd_q[i]; 386 387 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, 388 "%s-q%u", ccp->name, cmd_q->id); 389 if (IS_ERR(kthread)) { 390 dev_err(dev, "error creating queue thread (%ld)\n", 391 PTR_ERR(kthread)); 392 ret = PTR_ERR(kthread); 393 goto e_kthread; 394 } 395 396 cmd_q->kthread = kthread; 397 wake_up_process(kthread); 398 } 399 400 /* Register the RNG */ 401 ccp->hwrng.name = ccp->rngname; 402 ccp->hwrng.read = ccp_trng_read; 403 ret = hwrng_register(&ccp->hwrng); 404 if (ret) { 405 dev_err(dev, "error registering hwrng (%d)\n", ret); 406 goto e_kthread; 407 } 408 409 /* Register the DMA engine support */ 410 ret = ccp_dmaengine_register(ccp); 411 if (ret) 412 goto e_hwrng; 413 414 ccp_add_device(ccp); 415 416 /* Enable interrupts */ 417 iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); 418 419 return 0; 420 421 e_hwrng: 422 hwrng_unregister(&ccp->hwrng); 423 424 e_kthread: 425 for (i = 0; i < ccp->cmd_q_count; i++) 426 if (ccp->cmd_q[i].kthread) 427 kthread_stop(ccp->cmd_q[i].kthread); 428 429 ccp->free_irq(ccp); 430 431 e_pool: 432 for (i = 0; i < ccp->cmd_q_count; i++) 433 dma_pool_destroy(ccp->cmd_q[i].dma_pool); 434 435 return ret; 436 } 437 438 static void ccp_destroy(struct ccp_device *ccp) 439 { 440 struct ccp_cmd_queue *cmd_q; 441 struct ccp_cmd *cmd; 442 unsigned int qim, i; 443 444 /* Remove this device from the list of available units first */ 445 ccp_del_device(ccp); 446 447 /* Unregister the DMA engine */ 448 ccp_dmaengine_unregister(ccp); 449 450 /* Unregister the RNG */ 451 hwrng_unregister(&ccp->hwrng); 452 453 /* Stop the queue kthreads */ 454 for (i = 0; i < ccp->cmd_q_count; i++) 455 if (ccp->cmd_q[i].kthread) 456 kthread_stop(ccp->cmd_q[i].kthread); 457 458 /* Build queue interrupt mask (two interrupt masks per queue) */ 459 qim = 0; 460 for (i = 0; i < ccp->cmd_q_count; i++) { 461 cmd_q = &ccp->cmd_q[i]; 462 qim |= cmd_q->int_ok | cmd_q->int_err; 463 } 464 465 /* Disable and clear interrupts */ 466 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); 467 for (i = 0; i < ccp->cmd_q_count; i++) { 468 cmd_q = &ccp->cmd_q[i]; 469 470 ioread32(cmd_q->reg_int_status); 471 ioread32(cmd_q->reg_status); 472 } 473 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); 474 475 ccp->free_irq(ccp); 476 477 for (i = 0; i < ccp->cmd_q_count; i++) 478 dma_pool_destroy(ccp->cmd_q[i].dma_pool); 479 480 /* Flush the cmd and backlog queue */ 481 while (!list_empty(&ccp->cmd)) { 482 /* Invoke the callback directly with an error code */ 483 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); 484 list_del(&cmd->entry); 485 cmd->callback(cmd->data, -ENODEV); 486 } 487 while (!list_empty(&ccp->backlog)) { 488 /* Invoke the callback directly with an error code */ 489 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); 490 list_del(&cmd->entry); 491 cmd->callback(cmd->data, -ENODEV); 492 } 493 } 494 495 static irqreturn_t ccp_irq_handler(int irq, void *data) 496 { 497 struct device *dev = data; 498 struct ccp_device *ccp = dev_get_drvdata(dev); 499 struct ccp_cmd_queue *cmd_q; 500 u32 q_int, status; 501 unsigned int i; 502 503 status = ioread32(ccp->io_regs + IRQ_STATUS_REG); 504 505 for (i = 0; i < ccp->cmd_q_count; i++) { 506 cmd_q = &ccp->cmd_q[i]; 507 508 q_int = status & (cmd_q->int_ok | cmd_q->int_err); 509 if (q_int) { 510 cmd_q->int_status = status; 511 cmd_q->q_status = ioread32(cmd_q->reg_status); 512 cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); 513 514 /* On error, only save the first error value */ 515 if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) 516 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); 517 518 cmd_q->int_rcvd = 1; 519 520 /* Acknowledge the interrupt and wake the kthread */ 521 iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); 522 wake_up_interruptible(&cmd_q->int_queue); 523 } 524 } 525 526 return IRQ_HANDLED; 527 } 528 529 static const struct ccp_actions ccp3_actions = { 530 .perform_aes = ccp_perform_aes, 531 .perform_xts_aes = ccp_perform_xts_aes, 532 .perform_sha = ccp_perform_sha, 533 .perform_rsa = ccp_perform_rsa, 534 .perform_passthru = ccp_perform_passthru, 535 .perform_ecc = ccp_perform_ecc, 536 .init = ccp_init, 537 .destroy = ccp_destroy, 538 .irqhandler = ccp_irq_handler, 539 }; 540 541 struct ccp_vdata ccpv3 = { 542 .version = CCP_VERSION(3, 0), 543 .perform = &ccp3_actions, 544 }; 545