1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Driver for IBM PowerNV compression accelerator 4 * 5 * Copyright (C) 2015 Dan Streetman, IBM Corp 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include "nx-842.h" 11 12 #include <crypto/internal/scompress.h> 13 #include <linux/timer.h> 14 15 #include <asm/prom.h> 16 #include <asm/icswx.h> 17 #include <asm/vas.h> 18 #include <asm/reg.h> 19 #include <asm/opal-api.h> 20 #include <asm/opal.h> 21 22 MODULE_LICENSE("GPL"); 23 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 24 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors"); 25 MODULE_ALIAS_CRYPTO("842"); 26 MODULE_ALIAS_CRYPTO("842-nx"); 27 28 #define WORKMEM_ALIGN (CRB_ALIGN) 29 #define CSB_WAIT_MAX (5000) /* ms */ 30 #define VAS_RETRIES (10) 31 32 struct nx842_workmem { 33 /* Below fields must be properly aligned */ 34 struct coprocessor_request_block crb; /* CRB_ALIGN align */ 35 struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ 36 struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ 37 /* Above fields must be properly aligned */ 38 39 ktime_t start; 40 41 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ 42 } __packed __aligned(WORKMEM_ALIGN); 43 44 struct nx_coproc { 45 unsigned int chip_id; 46 unsigned int ct; /* Can be 842 or GZIP high/normal*/ 47 unsigned int ci; /* Coprocessor instance, used with icswx */ 48 struct { 49 struct vas_window *rxwin; 50 int id; 51 } vas; 52 struct list_head list; 53 }; 54 55 /* 56 * Send the request to NX engine on the chip for the corresponding CPU 57 * where the process is executing. Use with VAS function. 58 */ 59 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin); 60 61 /* no cpu hotplug on powernv, so this list never changes after init */ 62 static LIST_HEAD(nx_coprocs); 63 static unsigned int nx842_ct; /* used in icswx function */ 64 65 /* 66 * Using same values as in skiboot or coprocessor type representing 67 * in NX workbook. 68 */ 69 #define NX_CT_GZIP (2) /* on P9 and later */ 70 #define NX_CT_842 (3) 71 72 static int (*nx842_powernv_exec)(const unsigned char *in, 73 unsigned int inlen, unsigned char *out, 74 unsigned int *outlenp, void *workmem, int fc); 75 76 /* 77 * setup_indirect_dde - Setup an indirect DDE 78 * 79 * The DDE is setup with the DDE count, byte count, and address of 80 * first direct DDE in the list. 81 */ 82 static void setup_indirect_dde(struct data_descriptor_entry *dde, 83 struct data_descriptor_entry *ddl, 84 unsigned int dde_count, unsigned int byte_count) 85 { 86 dde->flags = 0; 87 dde->count = dde_count; 88 dde->index = 0; 89 dde->length = cpu_to_be32(byte_count); 90 dde->address = cpu_to_be64(nx842_get_pa(ddl)); 91 } 92 93 /* 94 * setup_direct_dde - Setup single DDE from buffer 95 * 96 * The DDE is setup with the buffer and length. The buffer must be properly 97 * aligned. The used length is returned. 98 * Returns: 99 * N Successfully set up DDE with N bytes 100 */ 101 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, 102 unsigned long pa, unsigned int len) 103 { 104 unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); 105 106 dde->flags = 0; 107 dde->count = 0; 108 dde->index = 0; 109 dde->length = cpu_to_be32(l); 110 dde->address = cpu_to_be64(pa); 111 112 return l; 113 } 114 115 /* 116 * setup_ddl - Setup DDL from buffer 117 * 118 * Returns: 119 * 0 Successfully set up DDL 120 */ 121 static int setup_ddl(struct data_descriptor_entry *dde, 122 struct data_descriptor_entry *ddl, 123 unsigned char *buf, unsigned int len, 124 bool in) 125 { 126 unsigned long pa = nx842_get_pa(buf); 127 int i, ret, total_len = len; 128 129 if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { 130 pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n", 131 in ? "input" : "output", pa, DDE_BUFFER_ALIGN); 132 return -EINVAL; 133 } 134 135 /* only need to check last mult; since buffer must be 136 * DDE_BUFFER_ALIGN aligned, and that is a multiple of 137 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers 138 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. 139 */ 140 if (len % DDE_BUFFER_LAST_MULT) { 141 pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n", 142 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT); 143 if (in) 144 return -EINVAL; 145 len = round_down(len, DDE_BUFFER_LAST_MULT); 146 } 147 148 /* use a single direct DDE */ 149 if (len <= LEN_ON_PAGE(pa)) { 150 ret = setup_direct_dde(dde, pa, len); 151 WARN_ON(ret < len); 152 return 0; 153 } 154 155 /* use the DDL */ 156 for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { 157 ret = setup_direct_dde(&ddl[i], pa, len); 158 buf += ret; 159 len -= ret; 160 pa = nx842_get_pa(buf); 161 } 162 163 if (len > 0) { 164 pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n", 165 total_len, in ? "input" : "output", len); 166 if (in) 167 return -EMSGSIZE; 168 total_len -= len; 169 } 170 setup_indirect_dde(dde, ddl, i, total_len); 171 172 return 0; 173 } 174 175 #define CSB_ERR(csb, msg, ...) \ 176 pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ 177 ##__VA_ARGS__, (csb)->flags, \ 178 (csb)->cs, (csb)->cc, (csb)->ce, \ 179 be32_to_cpu((csb)->count)) 180 181 #define CSB_ERR_ADDR(csb, msg, ...) \ 182 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ 183 (unsigned long)be64_to_cpu((csb)->address)) 184 185 static int wait_for_csb(struct nx842_workmem *wmem, 186 struct coprocessor_status_block *csb) 187 { 188 ktime_t start = wmem->start, now = ktime_get(); 189 ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); 190 191 while (!(READ_ONCE(csb->flags) & CSB_V)) { 192 cpu_relax(); 193 now = ktime_get(); 194 if (ktime_after(now, timeout)) 195 break; 196 } 197 198 /* hw has updated csb and output buffer */ 199 barrier(); 200 201 /* check CSB flags */ 202 if (!(csb->flags & CSB_V)) { 203 CSB_ERR(csb, "CSB still not valid after %ld us, giving up", 204 (long)ktime_us_delta(now, start)); 205 return -ETIMEDOUT; 206 } 207 if (csb->flags & CSB_F) { 208 CSB_ERR(csb, "Invalid CSB format"); 209 return -EPROTO; 210 } 211 if (csb->flags & CSB_CH) { 212 CSB_ERR(csb, "Invalid CSB chaining state"); 213 return -EPROTO; 214 } 215 216 /* verify CSB completion sequence is 0 */ 217 if (csb->cs) { 218 CSB_ERR(csb, "Invalid CSB completion sequence"); 219 return -EPROTO; 220 } 221 222 /* check CSB Completion Code */ 223 switch (csb->cc) { 224 /* no error */ 225 case CSB_CC_SUCCESS: 226 break; 227 case CSB_CC_TPBC_GT_SPBC: 228 /* not an error, but the compressed data is 229 * larger than the uncompressed data :( 230 */ 231 break; 232 233 /* input data errors */ 234 case CSB_CC_OPERAND_OVERLAP: 235 /* input and output buffers overlap */ 236 CSB_ERR(csb, "Operand Overlap error"); 237 return -EINVAL; 238 case CSB_CC_INVALID_OPERAND: 239 CSB_ERR(csb, "Invalid operand"); 240 return -EINVAL; 241 case CSB_CC_NOSPC: 242 /* output buffer too small */ 243 return -ENOSPC; 244 case CSB_CC_ABORT: 245 CSB_ERR(csb, "Function aborted"); 246 return -EINTR; 247 case CSB_CC_CRC_MISMATCH: 248 CSB_ERR(csb, "CRC mismatch"); 249 return -EINVAL; 250 case CSB_CC_TEMPL_INVALID: 251 CSB_ERR(csb, "Compressed data template invalid"); 252 return -EINVAL; 253 case CSB_CC_TEMPL_OVERFLOW: 254 CSB_ERR(csb, "Compressed data template shows data past end"); 255 return -EINVAL; 256 case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */ 257 /* 258 * DDE byte count exceeds the limit specified in Maximum 259 * byte count register. 260 */ 261 CSB_ERR(csb, "DDE byte count exceeds the limit"); 262 return -EINVAL; 263 264 /* these should not happen */ 265 case CSB_CC_INVALID_ALIGN: 266 /* setup_ddl should have detected this */ 267 CSB_ERR_ADDR(csb, "Invalid alignment"); 268 return -EINVAL; 269 case CSB_CC_DATA_LENGTH: 270 /* setup_ddl should have detected this */ 271 CSB_ERR(csb, "Invalid data length"); 272 return -EINVAL; 273 case CSB_CC_WR_TRANSLATION: 274 case CSB_CC_TRANSLATION: 275 case CSB_CC_TRANSLATION_DUP1: 276 case CSB_CC_TRANSLATION_DUP2: 277 case CSB_CC_TRANSLATION_DUP3: 278 case CSB_CC_TRANSLATION_DUP4: 279 case CSB_CC_TRANSLATION_DUP5: 280 case CSB_CC_TRANSLATION_DUP6: 281 /* should not happen, we use physical addrs */ 282 CSB_ERR_ADDR(csb, "Translation error"); 283 return -EPROTO; 284 case CSB_CC_WR_PROTECTION: 285 case CSB_CC_PROTECTION: 286 case CSB_CC_PROTECTION_DUP1: 287 case CSB_CC_PROTECTION_DUP2: 288 case CSB_CC_PROTECTION_DUP3: 289 case CSB_CC_PROTECTION_DUP4: 290 case CSB_CC_PROTECTION_DUP5: 291 case CSB_CC_PROTECTION_DUP6: 292 /* should not happen, we use physical addrs */ 293 CSB_ERR_ADDR(csb, "Protection error"); 294 return -EPROTO; 295 case CSB_CC_PRIVILEGE: 296 /* shouldn't happen, we're in HYP mode */ 297 CSB_ERR(csb, "Insufficient Privilege error"); 298 return -EPROTO; 299 case CSB_CC_EXCESSIVE_DDE: 300 /* shouldn't happen, setup_ddl doesn't use many dde's */ 301 CSB_ERR(csb, "Too many DDEs in DDL"); 302 return -EINVAL; 303 case CSB_CC_TRANSPORT: 304 case CSB_CC_INVALID_CRB: /* P9 or later */ 305 /* shouldn't happen, we setup CRB correctly */ 306 CSB_ERR(csb, "Invalid CRB"); 307 return -EINVAL; 308 case CSB_CC_INVALID_DDE: /* P9 or later */ 309 /* 310 * shouldn't happen, setup_direct/indirect_dde creates 311 * DDE right 312 */ 313 CSB_ERR(csb, "Invalid DDE"); 314 return -EINVAL; 315 case CSB_CC_SEGMENTED_DDL: 316 /* shouldn't happen, setup_ddl creates DDL right */ 317 CSB_ERR(csb, "Segmented DDL error"); 318 return -EINVAL; 319 case CSB_CC_DDE_OVERFLOW: 320 /* shouldn't happen, setup_ddl creates DDL right */ 321 CSB_ERR(csb, "DDE overflow error"); 322 return -EINVAL; 323 case CSB_CC_SESSION: 324 /* should not happen with ICSWX */ 325 CSB_ERR(csb, "Session violation error"); 326 return -EPROTO; 327 case CSB_CC_CHAIN: 328 /* should not happen, we don't use chained CRBs */ 329 CSB_ERR(csb, "Chained CRB error"); 330 return -EPROTO; 331 case CSB_CC_SEQUENCE: 332 /* should not happen, we don't use chained CRBs */ 333 CSB_ERR(csb, "CRB sequence number error"); 334 return -EPROTO; 335 case CSB_CC_UNKNOWN_CODE: 336 CSB_ERR(csb, "Unknown subfunction code"); 337 return -EPROTO; 338 339 /* hardware errors */ 340 case CSB_CC_RD_EXTERNAL: 341 case CSB_CC_RD_EXTERNAL_DUP1: 342 case CSB_CC_RD_EXTERNAL_DUP2: 343 case CSB_CC_RD_EXTERNAL_DUP3: 344 CSB_ERR_ADDR(csb, "Read error outside coprocessor"); 345 return -EPROTO; 346 case CSB_CC_WR_EXTERNAL: 347 CSB_ERR_ADDR(csb, "Write error outside coprocessor"); 348 return -EPROTO; 349 case CSB_CC_INTERNAL: 350 CSB_ERR(csb, "Internal error in coprocessor"); 351 return -EPROTO; 352 case CSB_CC_PROVISION: 353 CSB_ERR(csb, "Storage provision error"); 354 return -EPROTO; 355 case CSB_CC_HW: 356 CSB_ERR(csb, "Correctable hardware error"); 357 return -EPROTO; 358 case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */ 359 CSB_ERR(csb, "Job did not finish within allowed time"); 360 return -EPROTO; 361 362 default: 363 CSB_ERR(csb, "Invalid CC %d", csb->cc); 364 return -EPROTO; 365 } 366 367 /* check Completion Extension state */ 368 if (csb->ce & CSB_CE_TERMINATION) { 369 CSB_ERR(csb, "CSB request was terminated"); 370 return -EPROTO; 371 } 372 if (csb->ce & CSB_CE_INCOMPLETE) { 373 CSB_ERR(csb, "CSB request not complete"); 374 return -EPROTO; 375 } 376 if (!(csb->ce & CSB_CE_TPBC)) { 377 CSB_ERR(csb, "TPBC not provided, unknown target length"); 378 return -EPROTO; 379 } 380 381 /* successful completion */ 382 pr_debug_ratelimited("Processed %u bytes in %lu us\n", 383 be32_to_cpu(csb->count), 384 (unsigned long)ktime_us_delta(now, start)); 385 386 return 0; 387 } 388 389 static int nx842_config_crb(const unsigned char *in, unsigned int inlen, 390 unsigned char *out, unsigned int outlen, 391 struct nx842_workmem *wmem) 392 { 393 struct coprocessor_request_block *crb; 394 struct coprocessor_status_block *csb; 395 u64 csb_addr; 396 int ret; 397 398 crb = &wmem->crb; 399 csb = &crb->csb; 400 401 /* Clear any previous values */ 402 memset(crb, 0, sizeof(*crb)); 403 404 /* set up DDLs */ 405 ret = setup_ddl(&crb->source, wmem->ddl_in, 406 (unsigned char *)in, inlen, true); 407 if (ret) 408 return ret; 409 410 ret = setup_ddl(&crb->target, wmem->ddl_out, 411 out, outlen, false); 412 if (ret) 413 return ret; 414 415 /* set up CRB's CSB addr */ 416 csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; 417 csb_addr |= CRB_CSB_AT; /* Addrs are phys */ 418 crb->csb_addr = cpu_to_be64(csb_addr); 419 420 return 0; 421 } 422 423 /** 424 * nx842_exec_icswx - compress/decompress data using the 842 algorithm 425 * 426 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 427 * This compresses or decompresses the provided input buffer into the provided 428 * output buffer. 429 * 430 * Upon return from this function @outlen contains the length of the 431 * output data. If there is an error then @outlen will be 0 and an 432 * error will be specified by the return code from this function. 433 * 434 * The @workmem buffer should only be used by one function call at a time. 435 * 436 * @in: input buffer pointer 437 * @inlen: input buffer size 438 * @out: output buffer pointer 439 * @outlenp: output buffer size pointer 440 * @workmem: working memory buffer pointer, size determined by 441 * nx842_powernv_driver.workmem_size 442 * @fc: function code, see CCW Function Codes in nx-842.h 443 * 444 * Returns: 445 * 0 Success, output of length @outlenp stored in the buffer at @out 446 * -ENODEV Hardware unavailable 447 * -ENOSPC Output buffer is to small 448 * -EMSGSIZE Input buffer too large 449 * -EINVAL buffer constraints do not fix nx842_constraints 450 * -EPROTO hardware error during operation 451 * -ETIMEDOUT hardware did not complete operation in reasonable time 452 * -EINTR operation was aborted 453 */ 454 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen, 455 unsigned char *out, unsigned int *outlenp, 456 void *workmem, int fc) 457 { 458 struct coprocessor_request_block *crb; 459 struct coprocessor_status_block *csb; 460 struct nx842_workmem *wmem; 461 int ret; 462 u32 ccw; 463 unsigned int outlen = *outlenp; 464 465 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 466 467 *outlenp = 0; 468 469 /* shoudn't happen, we don't load without a coproc */ 470 if (!nx842_ct) { 471 pr_err_ratelimited("coprocessor CT is 0"); 472 return -ENODEV; 473 } 474 475 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 476 if (ret) 477 return ret; 478 479 crb = &wmem->crb; 480 csb = &crb->csb; 481 482 /* set up CCW */ 483 ccw = 0; 484 ccw = SET_FIELD(CCW_CT, ccw, nx842_ct); 485 ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */ 486 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 487 488 wmem->start = ktime_get(); 489 490 /* do ICSWX */ 491 ret = icswx(cpu_to_be32(ccw), crb); 492 493 pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret, 494 (unsigned int)ccw, 495 (unsigned int)be32_to_cpu(crb->ccw)); 496 497 /* 498 * NX842 coprocessor sets 3rd bit in CR register with XER[S0]. 499 * XER[S0] is the integer summary overflow bit which is nothing 500 * to do NX. Since this bit can be set with other return values, 501 * mask this bit. 502 */ 503 ret &= ~ICSWX_XERS0; 504 505 switch (ret) { 506 case ICSWX_INITIATED: 507 ret = wait_for_csb(wmem, csb); 508 break; 509 case ICSWX_BUSY: 510 pr_debug_ratelimited("842 Coprocessor busy\n"); 511 ret = -EBUSY; 512 break; 513 case ICSWX_REJECTED: 514 pr_err_ratelimited("ICSWX rejected\n"); 515 ret = -EPROTO; 516 break; 517 } 518 519 if (!ret) 520 *outlenp = be32_to_cpu(csb->count); 521 522 return ret; 523 } 524 525 /** 526 * nx842_exec_vas - compress/decompress data using the 842 algorithm 527 * 528 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 529 * This compresses or decompresses the provided input buffer into the provided 530 * output buffer. 531 * 532 * Upon return from this function @outlen contains the length of the 533 * output data. If there is an error then @outlen will be 0 and an 534 * error will be specified by the return code from this function. 535 * 536 * The @workmem buffer should only be used by one function call at a time. 537 * 538 * @in: input buffer pointer 539 * @inlen: input buffer size 540 * @out: output buffer pointer 541 * @outlenp: output buffer size pointer 542 * @workmem: working memory buffer pointer, size determined by 543 * nx842_powernv_driver.workmem_size 544 * @fc: function code, see CCW Function Codes in nx-842.h 545 * 546 * Returns: 547 * 0 Success, output of length @outlenp stored in the buffer 548 * at @out 549 * -ENODEV Hardware unavailable 550 * -ENOSPC Output buffer is to small 551 * -EMSGSIZE Input buffer too large 552 * -EINVAL buffer constraints do not fix nx842_constraints 553 * -EPROTO hardware error during operation 554 * -ETIMEDOUT hardware did not complete operation in reasonable time 555 * -EINTR operation was aborted 556 */ 557 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen, 558 unsigned char *out, unsigned int *outlenp, 559 void *workmem, int fc) 560 { 561 struct coprocessor_request_block *crb; 562 struct coprocessor_status_block *csb; 563 struct nx842_workmem *wmem; 564 struct vas_window *txwin; 565 int ret, i = 0; 566 u32 ccw; 567 unsigned int outlen = *outlenp; 568 569 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 570 571 *outlenp = 0; 572 573 crb = &wmem->crb; 574 csb = &crb->csb; 575 576 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 577 if (ret) 578 return ret; 579 580 ccw = 0; 581 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 582 crb->ccw = cpu_to_be32(ccw); 583 584 do { 585 wmem->start = ktime_get(); 586 preempt_disable(); 587 txwin = this_cpu_read(cpu_txwin); 588 589 /* 590 * VAS copy CRB into L2 cache. Refer <asm/vas.h>. 591 * @crb and @offset. 592 */ 593 vas_copy_crb(crb, 0); 594 595 /* 596 * VAS paste previously copied CRB to NX. 597 * @txwin, @offset and @last (must be true). 598 */ 599 ret = vas_paste_crb(txwin, 0, 1); 600 preempt_enable(); 601 /* 602 * Retry copy/paste function for VAS failures. 603 */ 604 } while (ret && (i++ < VAS_RETRIES)); 605 606 if (ret) { 607 pr_err_ratelimited("VAS copy/paste failed\n"); 608 return ret; 609 } 610 611 ret = wait_for_csb(wmem, csb); 612 if (!ret) 613 *outlenp = be32_to_cpu(csb->count); 614 615 return ret; 616 } 617 618 /** 619 * nx842_powernv_compress - Compress data using the 842 algorithm 620 * 621 * Compression provided by the NX842 coprocessor on IBM PowerNV systems. 622 * The input buffer is compressed and the result is stored in the 623 * provided output buffer. 624 * 625 * Upon return from this function @outlen contains the length of the 626 * compressed data. If there is an error then @outlen will be 0 and an 627 * error will be specified by the return code from this function. 628 * 629 * @in: input buffer pointer 630 * @inlen: input buffer size 631 * @out: output buffer pointer 632 * @outlenp: output buffer size pointer 633 * @wmem: working memory buffer pointer, size determined by 634 * nx842_powernv_driver.workmem_size 635 * 636 * Returns: see @nx842_powernv_exec() 637 */ 638 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, 639 unsigned char *out, unsigned int *outlenp, 640 void *wmem) 641 { 642 return nx842_powernv_exec(in, inlen, out, outlenp, 643 wmem, CCW_FC_842_COMP_CRC); 644 } 645 646 /** 647 * nx842_powernv_decompress - Decompress data using the 842 algorithm 648 * 649 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. 650 * The input buffer is decompressed and the result is stored in the 651 * provided output buffer. 652 * 653 * Upon return from this function @outlen contains the length of the 654 * decompressed data. If there is an error then @outlen will be 0 and an 655 * error will be specified by the return code from this function. 656 * 657 * @in: input buffer pointer 658 * @inlen: input buffer size 659 * @out: output buffer pointer 660 * @outlenp: output buffer size pointer 661 * @wmem: working memory buffer pointer, size determined by 662 * nx842_powernv_driver.workmem_size 663 * 664 * Returns: see @nx842_powernv_exec() 665 */ 666 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, 667 unsigned char *out, unsigned int *outlenp, 668 void *wmem) 669 { 670 return nx842_powernv_exec(in, inlen, out, outlenp, 671 wmem, CCW_FC_842_DECOMP_CRC); 672 } 673 674 static inline void nx_add_coprocs_list(struct nx_coproc *coproc, 675 int chipid) 676 { 677 coproc->chip_id = chipid; 678 INIT_LIST_HEAD(&coproc->list); 679 list_add(&coproc->list, &nx_coprocs); 680 } 681 682 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc) 683 { 684 struct vas_window *txwin = NULL; 685 struct vas_tx_win_attr txattr; 686 687 /* 688 * Kernel requests will be high priority. So open send 689 * windows only for high priority RxFIFO entries. 690 */ 691 vas_init_tx_win_attr(&txattr, coproc->ct); 692 txattr.lpid = 0; /* lpid is 0 for kernel requests */ 693 694 /* 695 * Open a VAS send window which is used to send request to NX. 696 */ 697 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr); 698 if (IS_ERR(txwin)) 699 pr_err("ibm,nx-842: Can not open TX window: %ld\n", 700 PTR_ERR(txwin)); 701 702 return txwin; 703 } 704 705 /* 706 * Identify chip ID for each CPU, open send wndow for the corresponding NX 707 * engine and save txwin in percpu cpu_txwin. 708 * cpu_txwin is used in copy/paste operation for each compression / 709 * decompression request. 710 */ 711 static int nx_open_percpu_txwins(void) 712 { 713 struct nx_coproc *coproc, *n; 714 unsigned int i, chip_id; 715 716 for_each_possible_cpu(i) { 717 struct vas_window *txwin = NULL; 718 719 chip_id = cpu_to_chip_id(i); 720 721 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { 722 /* 723 * Kernel requests use only high priority FIFOs. So 724 * open send windows for these FIFOs. 725 * GZIP is not supported in kernel right now. 726 */ 727 728 if (coproc->ct != VAS_COP_TYPE_842_HIPRI) 729 continue; 730 731 if (coproc->chip_id == chip_id) { 732 txwin = nx_alloc_txwin(coproc); 733 if (IS_ERR(txwin)) 734 return PTR_ERR(txwin); 735 736 per_cpu(cpu_txwin, i) = txwin; 737 break; 738 } 739 } 740 741 if (!per_cpu(cpu_txwin, i)) { 742 /* shouldn't happen, Each chip will have NX engine */ 743 pr_err("NX engine is not available for CPU %d\n", i); 744 return -EINVAL; 745 } 746 } 747 748 return 0; 749 } 750 751 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority, 752 int high, int normal) 753 { 754 if (!strcmp(priority, "High")) 755 coproc->ct = high; 756 else if (!strcmp(priority, "Normal")) 757 coproc->ct = normal; 758 else { 759 pr_err("Invalid RxFIFO priority value\n"); 760 return -EINVAL; 761 } 762 763 return 0; 764 } 765 766 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, 767 int vasid, int type, int *ct) 768 { 769 struct vas_window *rxwin = NULL; 770 struct vas_rx_win_attr rxattr; 771 u32 lpid, pid, tid, fifo_size; 772 struct nx_coproc *coproc; 773 u64 rx_fifo; 774 const char *priority; 775 int ret; 776 777 ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo); 778 if (ret) { 779 pr_err("Missing rx-fifo-address property\n"); 780 return ret; 781 } 782 783 ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size); 784 if (ret) { 785 pr_err("Missing rx-fifo-size property\n"); 786 return ret; 787 } 788 789 ret = of_property_read_u32(dn, "lpid", &lpid); 790 if (ret) { 791 pr_err("Missing lpid property\n"); 792 return ret; 793 } 794 795 ret = of_property_read_u32(dn, "pid", &pid); 796 if (ret) { 797 pr_err("Missing pid property\n"); 798 return ret; 799 } 800 801 ret = of_property_read_u32(dn, "tid", &tid); 802 if (ret) { 803 pr_err("Missing tid property\n"); 804 return ret; 805 } 806 807 ret = of_property_read_string(dn, "priority", &priority); 808 if (ret) { 809 pr_err("Missing priority property\n"); 810 return ret; 811 } 812 813 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 814 if (!coproc) 815 return -ENOMEM; 816 817 if (type == NX_CT_842) 818 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI, 819 VAS_COP_TYPE_842); 820 else if (type == NX_CT_GZIP) 821 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI, 822 VAS_COP_TYPE_GZIP); 823 824 if (ret) 825 goto err_out; 826 827 vas_init_rx_win_attr(&rxattr, coproc->ct); 828 rxattr.rx_fifo = rx_fifo; 829 rxattr.rx_fifo_size = fifo_size; 830 rxattr.lnotify_lpid = lpid; 831 rxattr.lnotify_pid = pid; 832 rxattr.lnotify_tid = tid; 833 /* 834 * Maximum RX window credits can not be more than #CRBs in 835 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns. 836 */ 837 rxattr.wcreds_max = fifo_size / CRB_SIZE; 838 839 /* 840 * Open a VAS receice window which is used to configure RxFIFO 841 * for NX. 842 */ 843 rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr); 844 if (IS_ERR(rxwin)) { 845 ret = PTR_ERR(rxwin); 846 pr_err("setting RxFIFO with VAS failed: %d\n", 847 ret); 848 goto err_out; 849 } 850 851 coproc->vas.rxwin = rxwin; 852 coproc->vas.id = vasid; 853 nx_add_coprocs_list(coproc, chip_id); 854 855 /* 856 * (lpid, pid, tid) combination has to be unique for each 857 * coprocessor instance in the system. So to make it 858 * unique, skiboot uses coprocessor type such as 842 or 859 * GZIP for pid and provides this value to kernel in pid 860 * device-tree property. 861 */ 862 *ct = pid; 863 864 return 0; 865 866 err_out: 867 kfree(coproc); 868 return ret; 869 } 870 871 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip) 872 { 873 int ret = 0; 874 875 if (opal_check_token(OPAL_NX_COPROC_INIT)) { 876 ret = opal_nx_coproc_init(chip_id, ct_842); 877 878 if (!ret) 879 ret = opal_nx_coproc_init(chip_id, ct_gzip); 880 881 if (ret) { 882 ret = opal_error_code(ret); 883 pr_err("Failed to initialize NX for chip(%d): %d\n", 884 chip_id, ret); 885 } 886 } else 887 pr_warn("Firmware doesn't support NX initialization\n"); 888 889 return ret; 890 } 891 892 static int __init find_nx_device_tree(struct device_node *dn, int chip_id, 893 int vasid, int type, char *devname, 894 int *ct) 895 { 896 int ret = 0; 897 898 if (of_device_is_compatible(dn, devname)) { 899 ret = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct); 900 if (ret) 901 of_node_put(dn); 902 } 903 904 return ret; 905 } 906 907 static int __init nx_powernv_probe_vas(struct device_node *pn) 908 { 909 int chip_id, vasid, ret = 0; 910 int ct_842 = 0, ct_gzip = 0; 911 struct device_node *dn; 912 913 chip_id = of_get_ibm_chip_id(pn); 914 if (chip_id < 0) { 915 pr_err("ibm,chip-id missing\n"); 916 return -EINVAL; 917 } 918 919 vasid = chip_to_vas_id(chip_id); 920 if (vasid < 0) { 921 pr_err("Unable to map chip_id %d to vasid\n", chip_id); 922 return -EINVAL; 923 } 924 925 for_each_child_of_node(pn, dn) { 926 ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842, 927 "ibm,p9-nx-842", &ct_842); 928 929 if (!ret) 930 ret = find_nx_device_tree(dn, chip_id, vasid, 931 NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip); 932 933 if (ret) { 934 of_node_put(dn); 935 return ret; 936 } 937 } 938 939 if (!ct_842 || !ct_gzip) { 940 pr_err("NX FIFO nodes are missing\n"); 941 return -EINVAL; 942 } 943 944 /* 945 * Initialize NX instance for both high and normal priority FIFOs. 946 */ 947 ret = nx_coproc_init(chip_id, ct_842, ct_gzip); 948 949 return ret; 950 } 951 952 static int __init nx842_powernv_probe(struct device_node *dn) 953 { 954 struct nx_coproc *coproc; 955 unsigned int ct, ci; 956 int chip_id; 957 958 chip_id = of_get_ibm_chip_id(dn); 959 if (chip_id < 0) { 960 pr_err("ibm,chip-id missing\n"); 961 return -EINVAL; 962 } 963 964 if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) { 965 pr_err("ibm,842-coprocessor-type missing\n"); 966 return -EINVAL; 967 } 968 969 if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) { 970 pr_err("ibm,842-coprocessor-instance missing\n"); 971 return -EINVAL; 972 } 973 974 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 975 if (!coproc) 976 return -ENOMEM; 977 978 coproc->ct = ct; 979 coproc->ci = ci; 980 nx_add_coprocs_list(coproc, chip_id); 981 982 pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci); 983 984 if (!nx842_ct) 985 nx842_ct = ct; 986 else if (nx842_ct != ct) 987 pr_err("NX842 chip %d, CT %d != first found CT %d\n", 988 chip_id, ct, nx842_ct); 989 990 return 0; 991 } 992 993 static void nx_delete_coprocs(void) 994 { 995 struct nx_coproc *coproc, *n; 996 struct vas_window *txwin; 997 int i; 998 999 /* 1000 * close percpu txwins that are opened for the corresponding coproc. 1001 */ 1002 for_each_possible_cpu(i) { 1003 txwin = per_cpu(cpu_txwin, i); 1004 if (txwin) 1005 vas_win_close(txwin); 1006 1007 per_cpu(cpu_txwin, i) = NULL; 1008 } 1009 1010 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { 1011 if (coproc->vas.rxwin) 1012 vas_win_close(coproc->vas.rxwin); 1013 1014 list_del(&coproc->list); 1015 kfree(coproc); 1016 } 1017 } 1018 1019 static struct nx842_constraints nx842_powernv_constraints = { 1020 .alignment = DDE_BUFFER_ALIGN, 1021 .multiple = DDE_BUFFER_LAST_MULT, 1022 .minimum = DDE_BUFFER_LAST_MULT, 1023 .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, 1024 }; 1025 1026 static struct nx842_driver nx842_powernv_driver = { 1027 .name = KBUILD_MODNAME, 1028 .owner = THIS_MODULE, 1029 .workmem_size = sizeof(struct nx842_workmem), 1030 .constraints = &nx842_powernv_constraints, 1031 .compress = nx842_powernv_compress, 1032 .decompress = nx842_powernv_decompress, 1033 }; 1034 1035 static void *nx842_powernv_crypto_alloc_ctx(void) 1036 { 1037 return nx842_crypto_alloc_ctx(&nx842_powernv_driver); 1038 } 1039 1040 static struct scomp_alg nx842_powernv_alg = { 1041 .base.cra_name = "842", 1042 .base.cra_driver_name = "842-nx", 1043 .base.cra_priority = 300, 1044 .base.cra_module = THIS_MODULE, 1045 1046 .alloc_ctx = nx842_powernv_crypto_alloc_ctx, 1047 .free_ctx = nx842_crypto_free_ctx, 1048 .compress = nx842_crypto_compress, 1049 .decompress = nx842_crypto_decompress, 1050 }; 1051 1052 static __init int nx_compress_powernv_init(void) 1053 { 1054 struct device_node *dn; 1055 int ret; 1056 1057 /* verify workmem size/align restrictions */ 1058 BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); 1059 BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); 1060 BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); 1061 /* verify buffer size/align restrictions */ 1062 BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); 1063 BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); 1064 BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); 1065 1066 for_each_compatible_node(dn, NULL, "ibm,power9-nx") { 1067 ret = nx_powernv_probe_vas(dn); 1068 if (ret) { 1069 nx_delete_coprocs(); 1070 of_node_put(dn); 1071 return ret; 1072 } 1073 } 1074 1075 if (list_empty(&nx_coprocs)) { 1076 for_each_compatible_node(dn, NULL, "ibm,power-nx") 1077 nx842_powernv_probe(dn); 1078 1079 if (!nx842_ct) 1080 return -ENODEV; 1081 1082 nx842_powernv_exec = nx842_exec_icswx; 1083 } else { 1084 /* 1085 * Register VAS user space API for NX GZIP so 1086 * that user space can use GZIP engine. 1087 * Using high FIFO priority for kernel requests and 1088 * normal FIFO priority is assigned for userspace. 1089 * 842 compression is supported only in kernel. 1090 */ 1091 ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, 1092 "nx-gzip"); 1093 1094 /* 1095 * GZIP is not supported in kernel right now. 1096 * So open tx windows only for 842. 1097 */ 1098 if (!ret) 1099 ret = nx_open_percpu_txwins(); 1100 1101 if (ret) { 1102 nx_delete_coprocs(); 1103 return ret; 1104 } 1105 1106 nx842_powernv_exec = nx842_exec_vas; 1107 } 1108 1109 ret = crypto_register_scomp(&nx842_powernv_alg); 1110 if (ret) { 1111 nx_delete_coprocs(); 1112 return ret; 1113 } 1114 1115 return 0; 1116 } 1117 module_init(nx_compress_powernv_init); 1118 1119 static void __exit nx_compress_powernv_exit(void) 1120 { 1121 /* 1122 * GZIP engine is supported only in power9 or later and nx842_ct 1123 * is used on power8 (icswx). 1124 * VAS API for NX GZIP is registered during init for user space 1125 * use. So delete this API use for GZIP engine. 1126 */ 1127 if (!nx842_ct) 1128 vas_unregister_api_powernv(); 1129 1130 crypto_unregister_scomp(&nx842_powernv_alg); 1131 1132 nx_delete_coprocs(); 1133 } 1134 module_exit(nx_compress_powernv_exit); 1135