1 /* 2 * AMD Cryptographic Coprocessor (CCP) driver 3 * 4 * Copyright (C) 2016 Advanced Micro Devices, Inc. 5 * 6 * Author: Gary R Hook <gary.hook@amd.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/kthread.h> 17 #include <linux/dma-mapping.h> 18 #include <linux/interrupt.h> 19 #include <linux/compiler.h> 20 #include <linux/ccp.h> 21 22 #include "ccp-dev.h" 23 24 /* Allocate the requested number of contiguous LSB slots 25 * from the LSB bitmap. Look in the private range for this 26 * queue first; failing that, check the public area. 27 * If no space is available, wait around. 28 * Return: first slot number 29 */ 30 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count) 31 { 32 struct ccp_device *ccp; 33 int start; 34 35 /* First look at the map for the queue */ 36 if (cmd_q->lsb >= 0) { 37 start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap, 38 LSB_SIZE, 39 0, count, 0); 40 if (start < LSB_SIZE) { 41 bitmap_set(cmd_q->lsbmap, start, count); 42 return start + cmd_q->lsb * LSB_SIZE; 43 } 44 } 45 46 /* No joy; try to get an entry from the shared blocks */ 47 ccp = cmd_q->ccp; 48 for (;;) { 49 mutex_lock(&ccp->sb_mutex); 50 51 start = (u32)bitmap_find_next_zero_area(ccp->lsbmap, 52 MAX_LSB_CNT * LSB_SIZE, 53 0, 54 count, 0); 55 if (start <= MAX_LSB_CNT * LSB_SIZE) { 56 bitmap_set(ccp->lsbmap, start, count); 57 58 mutex_unlock(&ccp->sb_mutex); 59 return start; 60 } 61 62 ccp->sb_avail = 0; 63 64 mutex_unlock(&ccp->sb_mutex); 65 66 /* Wait for KSB entries to become available */ 67 if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) 68 return 0; 69 } 70 } 71 72 /* Free a number of LSB slots from the bitmap, starting at 73 * the indicated starting slot number. 74 */ 75 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start, 76 unsigned int count) 77 { 78 if (!start) 79 return; 80 81 if (cmd_q->lsb == start) { 82 /* An entry from the private LSB */ 83 bitmap_clear(cmd_q->lsbmap, start, count); 84 } else { 85 /* From the shared LSBs */ 86 struct ccp_device *ccp = cmd_q->ccp; 87 88 mutex_lock(&ccp->sb_mutex); 89 bitmap_clear(ccp->lsbmap, start, count); 90 ccp->sb_avail = 1; 91 mutex_unlock(&ccp->sb_mutex); 92 wake_up_interruptible_all(&ccp->sb_queue); 93 } 94 } 95 96 /* CCP version 5: Union to define the function field (cmd_reg1/dword0) */ 97 union ccp_function { 98 struct { 99 u16 size:7; 100 u16 encrypt:1; 101 u16 mode:5; 102 u16 type:2; 103 } aes; 104 struct { 105 u16 size:7; 106 u16 encrypt:1; 107 u16 rsvd:5; 108 u16 type:2; 109 } aes_xts; 110 struct { 111 u16 rsvd1:10; 112 u16 type:4; 113 u16 rsvd2:1; 114 } sha; 115 struct { 116 u16 mode:3; 117 u16 size:12; 118 } rsa; 119 struct { 120 u16 byteswap:2; 121 u16 bitwise:3; 122 u16 reflect:2; 123 u16 rsvd:8; 124 } pt; 125 struct { 126 u16 rsvd:13; 127 } zlib; 128 struct { 129 u16 size:10; 130 u16 type:2; 131 u16 mode:3; 132 } ecc; 133 u16 raw; 134 }; 135 136 #define CCP_AES_SIZE(p) ((p)->aes.size) 137 #define CCP_AES_ENCRYPT(p) ((p)->aes.encrypt) 138 #define CCP_AES_MODE(p) ((p)->aes.mode) 139 #define CCP_AES_TYPE(p) ((p)->aes.type) 140 #define CCP_XTS_SIZE(p) ((p)->aes_xts.size) 141 #define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt) 142 #define CCP_SHA_TYPE(p) ((p)->sha.type) 143 #define CCP_RSA_SIZE(p) ((p)->rsa.size) 144 #define CCP_PT_BYTESWAP(p) ((p)->pt.byteswap) 145 #define CCP_PT_BITWISE(p) ((p)->pt.bitwise) 146 #define CCP_ECC_MODE(p) ((p)->ecc.mode) 147 #define CCP_ECC_AFFINE(p) ((p)->ecc.one) 148 149 /* Word 0 */ 150 #define CCP5_CMD_DW0(p) ((p)->dw0) 151 #define CCP5_CMD_SOC(p) (CCP5_CMD_DW0(p).soc) 152 #define CCP5_CMD_IOC(p) (CCP5_CMD_DW0(p).ioc) 153 #define CCP5_CMD_INIT(p) (CCP5_CMD_DW0(p).init) 154 #define CCP5_CMD_EOM(p) (CCP5_CMD_DW0(p).eom) 155 #define CCP5_CMD_FUNCTION(p) (CCP5_CMD_DW0(p).function) 156 #define CCP5_CMD_ENGINE(p) (CCP5_CMD_DW0(p).engine) 157 #define CCP5_CMD_PROT(p) (CCP5_CMD_DW0(p).prot) 158 159 /* Word 1 */ 160 #define CCP5_CMD_DW1(p) ((p)->length) 161 #define CCP5_CMD_LEN(p) (CCP5_CMD_DW1(p)) 162 163 /* Word 2 */ 164 #define CCP5_CMD_DW2(p) ((p)->src_lo) 165 #define CCP5_CMD_SRC_LO(p) (CCP5_CMD_DW2(p)) 166 167 /* Word 3 */ 168 #define CCP5_CMD_DW3(p) ((p)->dw3) 169 #define CCP5_CMD_SRC_MEM(p) ((p)->dw3.src_mem) 170 #define CCP5_CMD_SRC_HI(p) ((p)->dw3.src_hi) 171 #define CCP5_CMD_LSB_ID(p) ((p)->dw3.lsb_cxt_id) 172 #define CCP5_CMD_FIX_SRC(p) ((p)->dw3.fixed) 173 174 /* Words 4/5 */ 175 #define CCP5_CMD_DW4(p) ((p)->dw4) 176 #define CCP5_CMD_DST_LO(p) (CCP5_CMD_DW4(p).dst_lo) 177 #define CCP5_CMD_DW5(p) ((p)->dw5.fields.dst_hi) 178 #define CCP5_CMD_DST_HI(p) (CCP5_CMD_DW5(p)) 179 #define CCP5_CMD_DST_MEM(p) ((p)->dw5.fields.dst_mem) 180 #define CCP5_CMD_FIX_DST(p) ((p)->dw5.fields.fixed) 181 #define CCP5_CMD_SHA_LO(p) ((p)->dw4.sha_len_lo) 182 #define CCP5_CMD_SHA_HI(p) ((p)->dw5.sha_len_hi) 183 184 /* Word 6/7 */ 185 #define CCP5_CMD_DW6(p) ((p)->key_lo) 186 #define CCP5_CMD_KEY_LO(p) (CCP5_CMD_DW6(p)) 187 #define CCP5_CMD_DW7(p) ((p)->dw7) 188 #define CCP5_CMD_KEY_HI(p) ((p)->dw7.key_hi) 189 #define CCP5_CMD_KEY_MEM(p) ((p)->dw7.key_mem) 190 191 static inline u32 low_address(unsigned long addr) 192 { 193 return (u64)addr & 0x0ffffffff; 194 } 195 196 static inline u32 high_address(unsigned long addr) 197 { 198 return ((u64)addr >> 32) & 0x00000ffff; 199 } 200 201 static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q) 202 { 203 unsigned int head_idx, n; 204 u32 head_lo, queue_start; 205 206 queue_start = low_address(cmd_q->qdma_tail); 207 head_lo = ioread32(cmd_q->reg_head_lo); 208 head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc); 209 210 n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1; 211 212 return n % COMMANDS_PER_QUEUE; /* Always one unused spot */ 213 } 214 215 static int ccp5_do_cmd(struct ccp5_desc *desc, 216 struct ccp_cmd_queue *cmd_q) 217 { 218 u32 *mP; 219 __le32 *dP; 220 u32 tail; 221 int i; 222 int ret = 0; 223 224 if (CCP5_CMD_SOC(desc)) { 225 CCP5_CMD_IOC(desc) = 1; 226 CCP5_CMD_SOC(desc) = 0; 227 } 228 mutex_lock(&cmd_q->q_mutex); 229 230 mP = (u32 *) &cmd_q->qbase[cmd_q->qidx]; 231 dP = (__le32 *) desc; 232 for (i = 0; i < 8; i++) 233 mP[i] = cpu_to_le32(dP[i]); /* handle endianness */ 234 235 cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE; 236 237 /* The data used by this command must be flushed to memory */ 238 wmb(); 239 240 /* Write the new tail address back to the queue register */ 241 tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE); 242 iowrite32(tail, cmd_q->reg_tail_lo); 243 244 /* Turn the queue back on using our cached control register */ 245 iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control); 246 mutex_unlock(&cmd_q->q_mutex); 247 248 if (CCP5_CMD_IOC(desc)) { 249 /* Wait for the job to complete */ 250 ret = wait_event_interruptible(cmd_q->int_queue, 251 cmd_q->int_rcvd); 252 if (ret || cmd_q->cmd_error) { 253 if (cmd_q->cmd_error) 254 ccp_log_error(cmd_q->ccp, 255 cmd_q->cmd_error); 256 /* A version 5 device doesn't use Job IDs... */ 257 if (!ret) 258 ret = -EIO; 259 } 260 cmd_q->int_rcvd = 0; 261 } 262 263 return 0; 264 } 265 266 static int ccp5_perform_aes(struct ccp_op *op) 267 { 268 struct ccp5_desc desc; 269 union ccp_function function; 270 u32 key_addr = op->sb_key * LSB_ITEM_SIZE; 271 272 /* Zero out all the fields of the command desc */ 273 memset(&desc, 0, Q_DESC_SIZE); 274 275 CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES; 276 277 CCP5_CMD_SOC(&desc) = op->soc; 278 CCP5_CMD_IOC(&desc) = 1; 279 CCP5_CMD_INIT(&desc) = op->init; 280 CCP5_CMD_EOM(&desc) = op->eom; 281 CCP5_CMD_PROT(&desc) = 0; 282 283 function.raw = 0; 284 CCP_AES_ENCRYPT(&function) = op->u.aes.action; 285 CCP_AES_MODE(&function) = op->u.aes.mode; 286 CCP_AES_TYPE(&function) = op->u.aes.type; 287 if (op->u.aes.mode == CCP_AES_MODE_CFB) 288 CCP_AES_SIZE(&function) = 0x7f; 289 290 CCP5_CMD_FUNCTION(&desc) = function.raw; 291 292 CCP5_CMD_LEN(&desc) = op->src.u.dma.length; 293 294 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); 295 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); 296 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 297 298 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); 299 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); 300 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 301 302 CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); 303 CCP5_CMD_KEY_HI(&desc) = 0; 304 CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; 305 CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; 306 307 return ccp5_do_cmd(&desc, op->cmd_q); 308 } 309 310 static int ccp5_perform_xts_aes(struct ccp_op *op) 311 { 312 struct ccp5_desc desc; 313 union ccp_function function; 314 u32 key_addr = op->sb_key * LSB_ITEM_SIZE; 315 316 /* Zero out all the fields of the command desc */ 317 memset(&desc, 0, Q_DESC_SIZE); 318 319 CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128; 320 321 CCP5_CMD_SOC(&desc) = op->soc; 322 CCP5_CMD_IOC(&desc) = 1; 323 CCP5_CMD_INIT(&desc) = op->init; 324 CCP5_CMD_EOM(&desc) = op->eom; 325 CCP5_CMD_PROT(&desc) = 0; 326 327 function.raw = 0; 328 CCP_XTS_ENCRYPT(&function) = op->u.xts.action; 329 CCP_XTS_SIZE(&function) = op->u.xts.unit_size; 330 CCP5_CMD_FUNCTION(&desc) = function.raw; 331 332 CCP5_CMD_LEN(&desc) = op->src.u.dma.length; 333 334 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); 335 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); 336 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 337 338 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); 339 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); 340 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 341 342 CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); 343 CCP5_CMD_KEY_HI(&desc) = 0; 344 CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; 345 CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; 346 347 return ccp5_do_cmd(&desc, op->cmd_q); 348 } 349 350 static int ccp5_perform_sha(struct ccp_op *op) 351 { 352 struct ccp5_desc desc; 353 union ccp_function function; 354 355 /* Zero out all the fields of the command desc */ 356 memset(&desc, 0, Q_DESC_SIZE); 357 358 CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA; 359 360 CCP5_CMD_SOC(&desc) = op->soc; 361 CCP5_CMD_IOC(&desc) = 1; 362 CCP5_CMD_INIT(&desc) = 1; 363 CCP5_CMD_EOM(&desc) = op->eom; 364 CCP5_CMD_PROT(&desc) = 0; 365 366 function.raw = 0; 367 CCP_SHA_TYPE(&function) = op->u.sha.type; 368 CCP5_CMD_FUNCTION(&desc) = function.raw; 369 370 CCP5_CMD_LEN(&desc) = op->src.u.dma.length; 371 372 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); 373 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); 374 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 375 376 CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; 377 378 if (op->eom) { 379 CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits); 380 CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits); 381 } else { 382 CCP5_CMD_SHA_LO(&desc) = 0; 383 CCP5_CMD_SHA_HI(&desc) = 0; 384 } 385 386 return ccp5_do_cmd(&desc, op->cmd_q); 387 } 388 389 static int ccp5_perform_rsa(struct ccp_op *op) 390 { 391 struct ccp5_desc desc; 392 union ccp_function function; 393 394 /* Zero out all the fields of the command desc */ 395 memset(&desc, 0, Q_DESC_SIZE); 396 397 CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA; 398 399 CCP5_CMD_SOC(&desc) = op->soc; 400 CCP5_CMD_IOC(&desc) = 1; 401 CCP5_CMD_INIT(&desc) = 0; 402 CCP5_CMD_EOM(&desc) = 1; 403 CCP5_CMD_PROT(&desc) = 0; 404 405 function.raw = 0; 406 CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3; 407 CCP5_CMD_FUNCTION(&desc) = function.raw; 408 409 CCP5_CMD_LEN(&desc) = op->u.rsa.input_len; 410 411 /* Source is from external memory */ 412 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); 413 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); 414 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 415 416 /* Destination is in external memory */ 417 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); 418 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); 419 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 420 421 /* Exponent is in LSB memory */ 422 CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE; 423 CCP5_CMD_KEY_HI(&desc) = 0; 424 CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; 425 426 return ccp5_do_cmd(&desc, op->cmd_q); 427 } 428 429 static int ccp5_perform_passthru(struct ccp_op *op) 430 { 431 struct ccp5_desc desc; 432 union ccp_function function; 433 struct ccp_dma_info *saddr = &op->src.u.dma; 434 struct ccp_dma_info *daddr = &op->dst.u.dma; 435 436 memset(&desc, 0, Q_DESC_SIZE); 437 438 CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU; 439 440 CCP5_CMD_SOC(&desc) = 0; 441 CCP5_CMD_IOC(&desc) = 1; 442 CCP5_CMD_INIT(&desc) = 0; 443 CCP5_CMD_EOM(&desc) = op->eom; 444 CCP5_CMD_PROT(&desc) = 0; 445 446 function.raw = 0; 447 CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap; 448 CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod; 449 CCP5_CMD_FUNCTION(&desc) = function.raw; 450 451 /* Length of source data is always 256 bytes */ 452 if (op->src.type == CCP_MEMTYPE_SYSTEM) 453 CCP5_CMD_LEN(&desc) = saddr->length; 454 else 455 CCP5_CMD_LEN(&desc) = daddr->length; 456 457 if (op->src.type == CCP_MEMTYPE_SYSTEM) { 458 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); 459 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); 460 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 461 462 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) 463 CCP5_CMD_LSB_ID(&desc) = op->sb_key; 464 } else { 465 u32 key_addr = op->src.u.sb * CCP_SB_BYTES; 466 467 CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr); 468 CCP5_CMD_SRC_HI(&desc) = 0; 469 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB; 470 } 471 472 if (op->dst.type == CCP_MEMTYPE_SYSTEM) { 473 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); 474 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); 475 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 476 } else { 477 u32 key_addr = op->dst.u.sb * CCP_SB_BYTES; 478 479 CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr); 480 CCP5_CMD_DST_HI(&desc) = 0; 481 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB; 482 } 483 484 return ccp5_do_cmd(&desc, op->cmd_q); 485 } 486 487 static int ccp5_perform_ecc(struct ccp_op *op) 488 { 489 struct ccp5_desc desc; 490 union ccp_function function; 491 492 /* Zero out all the fields of the command desc */ 493 memset(&desc, 0, Q_DESC_SIZE); 494 495 CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC; 496 497 CCP5_CMD_SOC(&desc) = 0; 498 CCP5_CMD_IOC(&desc) = 1; 499 CCP5_CMD_INIT(&desc) = 0; 500 CCP5_CMD_EOM(&desc) = 1; 501 CCP5_CMD_PROT(&desc) = 0; 502 503 function.raw = 0; 504 function.ecc.mode = op->u.ecc.function; 505 CCP5_CMD_FUNCTION(&desc) = function.raw; 506 507 CCP5_CMD_LEN(&desc) = op->src.u.dma.length; 508 509 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); 510 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); 511 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 512 513 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); 514 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); 515 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; 516 517 return ccp5_do_cmd(&desc, op->cmd_q); 518 } 519 520 static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status) 521 { 522 int q_mask = 1 << cmd_q->id; 523 int queues = 0; 524 int j; 525 526 /* Build a bit mask to know which LSBs this queue has access to. 527 * Don't bother with segment 0 as it has special privileges. 528 */ 529 for (j = 1; j < MAX_LSB_CNT; j++) { 530 if (status & q_mask) 531 bitmap_set(cmd_q->lsbmask, j, 1); 532 status >>= LSB_REGION_WIDTH; 533 } 534 queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); 535 dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n", 536 cmd_q->id, queues); 537 538 return queues ? 0 : -EINVAL; 539 } 540 541 542 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp, 543 int lsb_cnt, int n_lsbs, 544 unsigned long *lsb_pub) 545 { 546 DECLARE_BITMAP(qlsb, MAX_LSB_CNT); 547 int bitno; 548 int qlsb_wgt; 549 int i; 550 551 /* For each queue: 552 * If the count of potential LSBs available to a queue matches the 553 * ordinal given to us in lsb_cnt: 554 * Copy the mask of possible LSBs for this queue into "qlsb"; 555 * For each bit in qlsb, see if the corresponding bit in the 556 * aggregation mask is set; if so, we have a match. 557 * If we have a match, clear the bit in the aggregation to 558 * mark it as no longer available. 559 * If there is no match, clear the bit in qlsb and keep looking. 560 */ 561 for (i = 0; i < ccp->cmd_q_count; i++) { 562 struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; 563 564 qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); 565 566 if (qlsb_wgt == lsb_cnt) { 567 bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT); 568 569 bitno = find_first_bit(qlsb, MAX_LSB_CNT); 570 while (bitno < MAX_LSB_CNT) { 571 if (test_bit(bitno, lsb_pub)) { 572 /* We found an available LSB 573 * that this queue can access 574 */ 575 cmd_q->lsb = bitno; 576 bitmap_clear(lsb_pub, bitno, 1); 577 dev_info(ccp->dev, 578 "Queue %d gets LSB %d\n", 579 i, bitno); 580 break; 581 } 582 bitmap_clear(qlsb, bitno, 1); 583 bitno = find_first_bit(qlsb, MAX_LSB_CNT); 584 } 585 if (bitno >= MAX_LSB_CNT) 586 return -EINVAL; 587 n_lsbs--; 588 } 589 } 590 return n_lsbs; 591 } 592 593 /* For each queue, from the most- to least-constrained: 594 * find an LSB that can be assigned to the queue. If there are N queues that 595 * can only use M LSBs, where N > M, fail; otherwise, every queue will get a 596 * dedicated LSB. Remaining LSB regions become a shared resource. 597 * If we have fewer LSBs than queues, all LSB regions become shared resources. 598 */ 599 static int ccp_assign_lsbs(struct ccp_device *ccp) 600 { 601 DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT); 602 DECLARE_BITMAP(qlsb, MAX_LSB_CNT); 603 int n_lsbs = 0; 604 int bitno; 605 int i, lsb_cnt; 606 int rc = 0; 607 608 bitmap_zero(lsb_pub, MAX_LSB_CNT); 609 610 /* Create an aggregate bitmap to get a total count of available LSBs */ 611 for (i = 0; i < ccp->cmd_q_count; i++) 612 bitmap_or(lsb_pub, 613 lsb_pub, ccp->cmd_q[i].lsbmask, 614 MAX_LSB_CNT); 615 616 n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT); 617 618 if (n_lsbs >= ccp->cmd_q_count) { 619 /* We have enough LSBS to give every queue a private LSB. 620 * Brute force search to start with the queues that are more 621 * constrained in LSB choice. When an LSB is privately 622 * assigned, it is removed from the public mask. 623 * This is an ugly N squared algorithm with some optimization. 624 */ 625 for (lsb_cnt = 1; 626 n_lsbs && (lsb_cnt <= MAX_LSB_CNT); 627 lsb_cnt++) { 628 rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs, 629 lsb_pub); 630 if (rc < 0) 631 return -EINVAL; 632 n_lsbs = rc; 633 } 634 } 635 636 rc = 0; 637 /* What's left of the LSBs, according to the public mask, now become 638 * shared. Any zero bits in the lsb_pub mask represent an LSB region 639 * that can't be used as a shared resource, so mark the LSB slots for 640 * them as "in use". 641 */ 642 bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT); 643 644 bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT); 645 while (bitno < MAX_LSB_CNT) { 646 bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE); 647 bitmap_set(qlsb, bitno, 1); 648 bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT); 649 } 650 651 return rc; 652 } 653 654 static int ccp5_init(struct ccp_device *ccp) 655 { 656 struct device *dev = ccp->dev; 657 struct ccp_cmd_queue *cmd_q; 658 struct dma_pool *dma_pool; 659 char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; 660 unsigned int qmr, qim, i; 661 u64 status; 662 u32 status_lo, status_hi; 663 int ret; 664 665 /* Find available queues */ 666 qim = 0; 667 qmr = ioread32(ccp->io_regs + Q_MASK_REG); 668 for (i = 0; i < MAX_HW_QUEUES; i++) { 669 670 if (!(qmr & (1 << i))) 671 continue; 672 673 /* Allocate a dma pool for this queue */ 674 snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", 675 ccp->name, i); 676 dma_pool = dma_pool_create(dma_pool_name, dev, 677 CCP_DMAPOOL_MAX_SIZE, 678 CCP_DMAPOOL_ALIGN, 0); 679 if (!dma_pool) { 680 dev_err(dev, "unable to allocate dma pool\n"); 681 ret = -ENOMEM; 682 } 683 684 cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; 685 ccp->cmd_q_count++; 686 687 cmd_q->ccp = ccp; 688 cmd_q->id = i; 689 cmd_q->dma_pool = dma_pool; 690 mutex_init(&cmd_q->q_mutex); 691 692 /* Page alignment satisfies our needs for N <= 128 */ 693 BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128); 694 cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); 695 cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize, 696 &cmd_q->qbase_dma, 697 GFP_KERNEL); 698 if (!cmd_q->qbase) { 699 dev_err(dev, "unable to allocate command queue\n"); 700 ret = -ENOMEM; 701 goto e_pool; 702 } 703 704 cmd_q->qidx = 0; 705 /* Preset some register values and masks that are queue 706 * number dependent 707 */ 708 cmd_q->reg_control = ccp->io_regs + 709 CMD5_Q_STATUS_INCR * (i + 1); 710 cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE; 711 cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE; 712 cmd_q->reg_int_enable = cmd_q->reg_control + 713 CMD5_Q_INT_ENABLE_BASE; 714 cmd_q->reg_interrupt_status = cmd_q->reg_control + 715 CMD5_Q_INTERRUPT_STATUS_BASE; 716 cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE; 717 cmd_q->reg_int_status = cmd_q->reg_control + 718 CMD5_Q_INT_STATUS_BASE; 719 cmd_q->reg_dma_status = cmd_q->reg_control + 720 CMD5_Q_DMA_STATUS_BASE; 721 cmd_q->reg_dma_read_status = cmd_q->reg_control + 722 CMD5_Q_DMA_READ_STATUS_BASE; 723 cmd_q->reg_dma_write_status = cmd_q->reg_control + 724 CMD5_Q_DMA_WRITE_STATUS_BASE; 725 726 init_waitqueue_head(&cmd_q->int_queue); 727 728 dev_dbg(dev, "queue #%u available\n", i); 729 } 730 if (ccp->cmd_q_count == 0) { 731 dev_notice(dev, "no command queues available\n"); 732 ret = -EIO; 733 goto e_pool; 734 } 735 dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); 736 737 /* Turn off the queues and disable interrupts until ready */ 738 for (i = 0; i < ccp->cmd_q_count; i++) { 739 cmd_q = &ccp->cmd_q[i]; 740 741 cmd_q->qcontrol = 0; /* Start with nothing */ 742 iowrite32(cmd_q->qcontrol, cmd_q->reg_control); 743 744 /* Disable the interrupts */ 745 iowrite32(0x00, cmd_q->reg_int_enable); 746 ioread32(cmd_q->reg_int_status); 747 ioread32(cmd_q->reg_status); 748 749 /* Clear the interrupts */ 750 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); 751 } 752 753 dev_dbg(dev, "Requesting an IRQ...\n"); 754 /* Request an irq */ 755 ret = ccp->get_irq(ccp); 756 if (ret) { 757 dev_err(dev, "unable to allocate an IRQ\n"); 758 goto e_pool; 759 } 760 761 dev_dbg(dev, "Loading LSB map...\n"); 762 /* Copy the private LSB mask to the public registers */ 763 status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); 764 status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET); 765 iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET); 766 iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET); 767 status = ((u64)status_hi<<30) | (u64)status_lo; 768 769 dev_dbg(dev, "Configuring virtual queues...\n"); 770 /* Configure size of each virtual queue accessible to host */ 771 for (i = 0; i < ccp->cmd_q_count; i++) { 772 u32 dma_addr_lo; 773 u32 dma_addr_hi; 774 775 cmd_q = &ccp->cmd_q[i]; 776 777 cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT); 778 cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT; 779 780 cmd_q->qdma_tail = cmd_q->qbase_dma; 781 dma_addr_lo = low_address(cmd_q->qdma_tail); 782 iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo); 783 iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo); 784 785 dma_addr_hi = high_address(cmd_q->qdma_tail); 786 cmd_q->qcontrol |= (dma_addr_hi << 16); 787 iowrite32(cmd_q->qcontrol, cmd_q->reg_control); 788 789 /* Find the LSB regions accessible to the queue */ 790 ccp_find_lsb_regions(cmd_q, status); 791 cmd_q->lsb = -1; /* Unassigned value */ 792 } 793 794 dev_dbg(dev, "Assigning LSBs...\n"); 795 ret = ccp_assign_lsbs(ccp); 796 if (ret) { 797 dev_err(dev, "Unable to assign LSBs (%d)\n", ret); 798 goto e_irq; 799 } 800 801 /* Optimization: pre-allocate LSB slots for each queue */ 802 for (i = 0; i < ccp->cmd_q_count; i++) { 803 ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2); 804 ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2); 805 } 806 807 dev_dbg(dev, "Starting threads...\n"); 808 /* Create a kthread for each queue */ 809 for (i = 0; i < ccp->cmd_q_count; i++) { 810 struct task_struct *kthread; 811 812 cmd_q = &ccp->cmd_q[i]; 813 814 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, 815 "%s-q%u", ccp->name, cmd_q->id); 816 if (IS_ERR(kthread)) { 817 dev_err(dev, "error creating queue thread (%ld)\n", 818 PTR_ERR(kthread)); 819 ret = PTR_ERR(kthread); 820 goto e_kthread; 821 } 822 823 cmd_q->kthread = kthread; 824 wake_up_process(kthread); 825 } 826 827 dev_dbg(dev, "Enabling interrupts...\n"); 828 /* Enable interrupts */ 829 for (i = 0; i < ccp->cmd_q_count; i++) { 830 cmd_q = &ccp->cmd_q[i]; 831 iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable); 832 } 833 834 dev_dbg(dev, "Registering device...\n"); 835 /* Put this on the unit list to make it available */ 836 ccp_add_device(ccp); 837 838 ret = ccp_register_rng(ccp); 839 if (ret) 840 goto e_kthread; 841 842 /* Register the DMA engine support */ 843 ret = ccp_dmaengine_register(ccp); 844 if (ret) 845 goto e_hwrng; 846 847 return 0; 848 849 e_hwrng: 850 ccp_unregister_rng(ccp); 851 852 e_kthread: 853 for (i = 0; i < ccp->cmd_q_count; i++) 854 if (ccp->cmd_q[i].kthread) 855 kthread_stop(ccp->cmd_q[i].kthread); 856 857 e_irq: 858 ccp->free_irq(ccp); 859 860 e_pool: 861 for (i = 0; i < ccp->cmd_q_count; i++) 862 dma_pool_destroy(ccp->cmd_q[i].dma_pool); 863 864 return ret; 865 } 866 867 static void ccp5_destroy(struct ccp_device *ccp) 868 { 869 struct device *dev = ccp->dev; 870 struct ccp_cmd_queue *cmd_q; 871 struct ccp_cmd *cmd; 872 unsigned int i; 873 874 /* Unregister the DMA engine */ 875 ccp_dmaengine_unregister(ccp); 876 877 /* Unregister the RNG */ 878 ccp_unregister_rng(ccp); 879 880 /* Remove this device from the list of available units first */ 881 ccp_del_device(ccp); 882 883 /* Disable and clear interrupts */ 884 for (i = 0; i < ccp->cmd_q_count; i++) { 885 cmd_q = &ccp->cmd_q[i]; 886 887 /* Turn off the run bit */ 888 iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control); 889 890 /* Disable the interrupts */ 891 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); 892 893 /* Clear the interrupt status */ 894 iowrite32(0x00, cmd_q->reg_int_enable); 895 ioread32(cmd_q->reg_int_status); 896 ioread32(cmd_q->reg_status); 897 } 898 899 /* Stop the queue kthreads */ 900 for (i = 0; i < ccp->cmd_q_count; i++) 901 if (ccp->cmd_q[i].kthread) 902 kthread_stop(ccp->cmd_q[i].kthread); 903 904 ccp->free_irq(ccp); 905 906 for (i = 0; i < ccp->cmd_q_count; i++) { 907 cmd_q = &ccp->cmd_q[i]; 908 dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, 909 cmd_q->qbase_dma); 910 } 911 912 /* Flush the cmd and backlog queue */ 913 while (!list_empty(&ccp->cmd)) { 914 /* Invoke the callback directly with an error code */ 915 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); 916 list_del(&cmd->entry); 917 cmd->callback(cmd->data, -ENODEV); 918 } 919 while (!list_empty(&ccp->backlog)) { 920 /* Invoke the callback directly with an error code */ 921 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); 922 list_del(&cmd->entry); 923 cmd->callback(cmd->data, -ENODEV); 924 } 925 } 926 927 static irqreturn_t ccp5_irq_handler(int irq, void *data) 928 { 929 struct device *dev = data; 930 struct ccp_device *ccp = dev_get_drvdata(dev); 931 u32 status; 932 unsigned int i; 933 934 for (i = 0; i < ccp->cmd_q_count; i++) { 935 struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; 936 937 status = ioread32(cmd_q->reg_interrupt_status); 938 939 if (status) { 940 cmd_q->int_status = status; 941 cmd_q->q_status = ioread32(cmd_q->reg_status); 942 cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); 943 944 /* On error, only save the first error value */ 945 if ((status & INT_ERROR) && !cmd_q->cmd_error) 946 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); 947 948 cmd_q->int_rcvd = 1; 949 950 /* Acknowledge the interrupt and wake the kthread */ 951 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); 952 wake_up_interruptible(&cmd_q->int_queue); 953 } 954 } 955 956 return IRQ_HANDLED; 957 } 958 959 static void ccp5_config(struct ccp_device *ccp) 960 { 961 /* Public side */ 962 iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET); 963 } 964 965 static void ccp5other_config(struct ccp_device *ccp) 966 { 967 int i; 968 u32 rnd; 969 970 /* We own all of the queues on the NTB CCP */ 971 972 iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET); 973 iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET); 974 for (i = 0; i < 12; i++) { 975 rnd = ioread32(ccp->io_regs + TRNG_OUT_REG); 976 iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET); 977 } 978 979 iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET); 980 iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET); 981 iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET); 982 983 iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); 984 iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET); 985 986 iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET); 987 988 ccp5_config(ccp); 989 } 990 991 /* Version 5 adds some function, but is essentially the same as v5 */ 992 static const struct ccp_actions ccp5_actions = { 993 .aes = ccp5_perform_aes, 994 .xts_aes = ccp5_perform_xts_aes, 995 .sha = ccp5_perform_sha, 996 .rsa = ccp5_perform_rsa, 997 .passthru = ccp5_perform_passthru, 998 .ecc = ccp5_perform_ecc, 999 .sballoc = ccp_lsb_alloc, 1000 .sbfree = ccp_lsb_free, 1001 .init = ccp5_init, 1002 .destroy = ccp5_destroy, 1003 .get_free_slots = ccp5_get_free_slots, 1004 .irqhandler = ccp5_irq_handler, 1005 }; 1006 1007 const struct ccp_vdata ccpv5a = { 1008 .version = CCP_VERSION(5, 0), 1009 .setup = ccp5_config, 1010 .perform = &ccp5_actions, 1011 .bar = 2, 1012 .offset = 0x0, 1013 }; 1014 1015 const struct ccp_vdata ccpv5b = { 1016 .version = CCP_VERSION(5, 0), 1017 .setup = ccp5other_config, 1018 .perform = &ccp5_actions, 1019 .bar = 2, 1020 .offset = 0x0, 1021 }; 1022