1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Driver for IBM PowerNV compression accelerator 4 * 5 * Copyright (C) 2015 Dan Streetman, IBM Corp 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include "nx-842.h" 11 12 #include <linux/timer.h> 13 14 #include <asm/prom.h> 15 #include <asm/icswx.h> 16 #include <asm/vas.h> 17 #include <asm/reg.h> 18 #include <asm/opal-api.h> 19 #include <asm/opal.h> 20 21 MODULE_LICENSE("GPL"); 22 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 23 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors"); 24 MODULE_ALIAS_CRYPTO("842"); 25 MODULE_ALIAS_CRYPTO("842-nx"); 26 27 #define WORKMEM_ALIGN (CRB_ALIGN) 28 #define CSB_WAIT_MAX (5000) /* ms */ 29 #define VAS_RETRIES (10) 30 31 struct nx842_workmem { 32 /* Below fields must be properly aligned */ 33 struct coprocessor_request_block crb; /* CRB_ALIGN align */ 34 struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ 35 struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ 36 /* Above fields must be properly aligned */ 37 38 ktime_t start; 39 40 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ 41 } __packed __aligned(WORKMEM_ALIGN); 42 43 struct nx_coproc { 44 unsigned int chip_id; 45 unsigned int ct; /* Can be 842 or GZIP high/normal*/ 46 unsigned int ci; /* Coprocessor instance, used with icswx */ 47 struct { 48 struct vas_window *rxwin; 49 int id; 50 } vas; 51 struct list_head list; 52 }; 53 54 /* 55 * Send the request to NX engine on the chip for the corresponding CPU 56 * where the process is executing. Use with VAS function. 57 */ 58 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin); 59 60 /* no cpu hotplug on powernv, so this list never changes after init */ 61 static LIST_HEAD(nx_coprocs); 62 static unsigned int nx842_ct; /* used in icswx function */ 63 64 /* 65 * Using same values as in skiboot or coprocessor type representing 66 * in NX workbook. 67 */ 68 #define NX_CT_GZIP (2) /* on P9 and later */ 69 #define NX_CT_842 (3) 70 71 static int (*nx842_powernv_exec)(const unsigned char *in, 72 unsigned int inlen, unsigned char *out, 73 unsigned int *outlenp, void *workmem, int fc); 74 75 /** 76 * setup_indirect_dde - Setup an indirect DDE 77 * 78 * The DDE is setup with the DDE count, byte count, and address of 79 * first direct DDE in the list. 80 */ 81 static void setup_indirect_dde(struct data_descriptor_entry *dde, 82 struct data_descriptor_entry *ddl, 83 unsigned int dde_count, unsigned int byte_count) 84 { 85 dde->flags = 0; 86 dde->count = dde_count; 87 dde->index = 0; 88 dde->length = cpu_to_be32(byte_count); 89 dde->address = cpu_to_be64(nx842_get_pa(ddl)); 90 } 91 92 /** 93 * setup_direct_dde - Setup single DDE from buffer 94 * 95 * The DDE is setup with the buffer and length. The buffer must be properly 96 * aligned. The used length is returned. 97 * Returns: 98 * N Successfully set up DDE with N bytes 99 */ 100 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, 101 unsigned long pa, unsigned int len) 102 { 103 unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); 104 105 dde->flags = 0; 106 dde->count = 0; 107 dde->index = 0; 108 dde->length = cpu_to_be32(l); 109 dde->address = cpu_to_be64(pa); 110 111 return l; 112 } 113 114 /** 115 * setup_ddl - Setup DDL from buffer 116 * 117 * Returns: 118 * 0 Successfully set up DDL 119 */ 120 static int setup_ddl(struct data_descriptor_entry *dde, 121 struct data_descriptor_entry *ddl, 122 unsigned char *buf, unsigned int len, 123 bool in) 124 { 125 unsigned long pa = nx842_get_pa(buf); 126 int i, ret, total_len = len; 127 128 if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { 129 pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n", 130 in ? "input" : "output", pa, DDE_BUFFER_ALIGN); 131 return -EINVAL; 132 } 133 134 /* only need to check last mult; since buffer must be 135 * DDE_BUFFER_ALIGN aligned, and that is a multiple of 136 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers 137 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. 138 */ 139 if (len % DDE_BUFFER_LAST_MULT) { 140 pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n", 141 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT); 142 if (in) 143 return -EINVAL; 144 len = round_down(len, DDE_BUFFER_LAST_MULT); 145 } 146 147 /* use a single direct DDE */ 148 if (len <= LEN_ON_PAGE(pa)) { 149 ret = setup_direct_dde(dde, pa, len); 150 WARN_ON(ret < len); 151 return 0; 152 } 153 154 /* use the DDL */ 155 for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { 156 ret = setup_direct_dde(&ddl[i], pa, len); 157 buf += ret; 158 len -= ret; 159 pa = nx842_get_pa(buf); 160 } 161 162 if (len > 0) { 163 pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n", 164 total_len, in ? "input" : "output", len); 165 if (in) 166 return -EMSGSIZE; 167 total_len -= len; 168 } 169 setup_indirect_dde(dde, ddl, i, total_len); 170 171 return 0; 172 } 173 174 #define CSB_ERR(csb, msg, ...) \ 175 pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ 176 ##__VA_ARGS__, (csb)->flags, \ 177 (csb)->cs, (csb)->cc, (csb)->ce, \ 178 be32_to_cpu((csb)->count)) 179 180 #define CSB_ERR_ADDR(csb, msg, ...) \ 181 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ 182 (unsigned long)be64_to_cpu((csb)->address)) 183 184 /** 185 * wait_for_csb 186 */ 187 static int wait_for_csb(struct nx842_workmem *wmem, 188 struct coprocessor_status_block *csb) 189 { 190 ktime_t start = wmem->start, now = ktime_get(); 191 ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); 192 193 while (!(READ_ONCE(csb->flags) & CSB_V)) { 194 cpu_relax(); 195 now = ktime_get(); 196 if (ktime_after(now, timeout)) 197 break; 198 } 199 200 /* hw has updated csb and output buffer */ 201 barrier(); 202 203 /* check CSB flags */ 204 if (!(csb->flags & CSB_V)) { 205 CSB_ERR(csb, "CSB still not valid after %ld us, giving up", 206 (long)ktime_us_delta(now, start)); 207 return -ETIMEDOUT; 208 } 209 if (csb->flags & CSB_F) { 210 CSB_ERR(csb, "Invalid CSB format"); 211 return -EPROTO; 212 } 213 if (csb->flags & CSB_CH) { 214 CSB_ERR(csb, "Invalid CSB chaining state"); 215 return -EPROTO; 216 } 217 218 /* verify CSB completion sequence is 0 */ 219 if (csb->cs) { 220 CSB_ERR(csb, "Invalid CSB completion sequence"); 221 return -EPROTO; 222 } 223 224 /* check CSB Completion Code */ 225 switch (csb->cc) { 226 /* no error */ 227 case CSB_CC_SUCCESS: 228 break; 229 case CSB_CC_TPBC_GT_SPBC: 230 /* not an error, but the compressed data is 231 * larger than the uncompressed data :( 232 */ 233 break; 234 235 /* input data errors */ 236 case CSB_CC_OPERAND_OVERLAP: 237 /* input and output buffers overlap */ 238 CSB_ERR(csb, "Operand Overlap error"); 239 return -EINVAL; 240 case CSB_CC_INVALID_OPERAND: 241 CSB_ERR(csb, "Invalid operand"); 242 return -EINVAL; 243 case CSB_CC_NOSPC: 244 /* output buffer too small */ 245 return -ENOSPC; 246 case CSB_CC_ABORT: 247 CSB_ERR(csb, "Function aborted"); 248 return -EINTR; 249 case CSB_CC_CRC_MISMATCH: 250 CSB_ERR(csb, "CRC mismatch"); 251 return -EINVAL; 252 case CSB_CC_TEMPL_INVALID: 253 CSB_ERR(csb, "Compressed data template invalid"); 254 return -EINVAL; 255 case CSB_CC_TEMPL_OVERFLOW: 256 CSB_ERR(csb, "Compressed data template shows data past end"); 257 return -EINVAL; 258 case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */ 259 /* 260 * DDE byte count exceeds the limit specified in Maximum 261 * byte count register. 262 */ 263 CSB_ERR(csb, "DDE byte count exceeds the limit"); 264 return -EINVAL; 265 266 /* these should not happen */ 267 case CSB_CC_INVALID_ALIGN: 268 /* setup_ddl should have detected this */ 269 CSB_ERR_ADDR(csb, "Invalid alignment"); 270 return -EINVAL; 271 case CSB_CC_DATA_LENGTH: 272 /* setup_ddl should have detected this */ 273 CSB_ERR(csb, "Invalid data length"); 274 return -EINVAL; 275 case CSB_CC_WR_TRANSLATION: 276 case CSB_CC_TRANSLATION: 277 case CSB_CC_TRANSLATION_DUP1: 278 case CSB_CC_TRANSLATION_DUP2: 279 case CSB_CC_TRANSLATION_DUP3: 280 case CSB_CC_TRANSLATION_DUP4: 281 case CSB_CC_TRANSLATION_DUP5: 282 case CSB_CC_TRANSLATION_DUP6: 283 /* should not happen, we use physical addrs */ 284 CSB_ERR_ADDR(csb, "Translation error"); 285 return -EPROTO; 286 case CSB_CC_WR_PROTECTION: 287 case CSB_CC_PROTECTION: 288 case CSB_CC_PROTECTION_DUP1: 289 case CSB_CC_PROTECTION_DUP2: 290 case CSB_CC_PROTECTION_DUP3: 291 case CSB_CC_PROTECTION_DUP4: 292 case CSB_CC_PROTECTION_DUP5: 293 case CSB_CC_PROTECTION_DUP6: 294 /* should not happen, we use physical addrs */ 295 CSB_ERR_ADDR(csb, "Protection error"); 296 return -EPROTO; 297 case CSB_CC_PRIVILEGE: 298 /* shouldn't happen, we're in HYP mode */ 299 CSB_ERR(csb, "Insufficient Privilege error"); 300 return -EPROTO; 301 case CSB_CC_EXCESSIVE_DDE: 302 /* shouldn't happen, setup_ddl doesn't use many dde's */ 303 CSB_ERR(csb, "Too many DDEs in DDL"); 304 return -EINVAL; 305 case CSB_CC_TRANSPORT: 306 case CSB_CC_INVALID_CRB: /* P9 or later */ 307 /* shouldn't happen, we setup CRB correctly */ 308 CSB_ERR(csb, "Invalid CRB"); 309 return -EINVAL; 310 case CSB_CC_INVALID_DDE: /* P9 or later */ 311 /* 312 * shouldn't happen, setup_direct/indirect_dde creates 313 * DDE right 314 */ 315 CSB_ERR(csb, "Invalid DDE"); 316 return -EINVAL; 317 case CSB_CC_SEGMENTED_DDL: 318 /* shouldn't happen, setup_ddl creates DDL right */ 319 CSB_ERR(csb, "Segmented DDL error"); 320 return -EINVAL; 321 case CSB_CC_DDE_OVERFLOW: 322 /* shouldn't happen, setup_ddl creates DDL right */ 323 CSB_ERR(csb, "DDE overflow error"); 324 return -EINVAL; 325 case CSB_CC_SESSION: 326 /* should not happen with ICSWX */ 327 CSB_ERR(csb, "Session violation error"); 328 return -EPROTO; 329 case CSB_CC_CHAIN: 330 /* should not happen, we don't use chained CRBs */ 331 CSB_ERR(csb, "Chained CRB error"); 332 return -EPROTO; 333 case CSB_CC_SEQUENCE: 334 /* should not happen, we don't use chained CRBs */ 335 CSB_ERR(csb, "CRB sequence number error"); 336 return -EPROTO; 337 case CSB_CC_UNKNOWN_CODE: 338 CSB_ERR(csb, "Unknown subfunction code"); 339 return -EPROTO; 340 341 /* hardware errors */ 342 case CSB_CC_RD_EXTERNAL: 343 case CSB_CC_RD_EXTERNAL_DUP1: 344 case CSB_CC_RD_EXTERNAL_DUP2: 345 case CSB_CC_RD_EXTERNAL_DUP3: 346 CSB_ERR_ADDR(csb, "Read error outside coprocessor"); 347 return -EPROTO; 348 case CSB_CC_WR_EXTERNAL: 349 CSB_ERR_ADDR(csb, "Write error outside coprocessor"); 350 return -EPROTO; 351 case CSB_CC_INTERNAL: 352 CSB_ERR(csb, "Internal error in coprocessor"); 353 return -EPROTO; 354 case CSB_CC_PROVISION: 355 CSB_ERR(csb, "Storage provision error"); 356 return -EPROTO; 357 case CSB_CC_HW: 358 CSB_ERR(csb, "Correctable hardware error"); 359 return -EPROTO; 360 case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */ 361 CSB_ERR(csb, "Job did not finish within allowed time"); 362 return -EPROTO; 363 364 default: 365 CSB_ERR(csb, "Invalid CC %d", csb->cc); 366 return -EPROTO; 367 } 368 369 /* check Completion Extension state */ 370 if (csb->ce & CSB_CE_TERMINATION) { 371 CSB_ERR(csb, "CSB request was terminated"); 372 return -EPROTO; 373 } 374 if (csb->ce & CSB_CE_INCOMPLETE) { 375 CSB_ERR(csb, "CSB request not complete"); 376 return -EPROTO; 377 } 378 if (!(csb->ce & CSB_CE_TPBC)) { 379 CSB_ERR(csb, "TPBC not provided, unknown target length"); 380 return -EPROTO; 381 } 382 383 /* successful completion */ 384 pr_debug_ratelimited("Processed %u bytes in %lu us\n", 385 be32_to_cpu(csb->count), 386 (unsigned long)ktime_us_delta(now, start)); 387 388 return 0; 389 } 390 391 static int nx842_config_crb(const unsigned char *in, unsigned int inlen, 392 unsigned char *out, unsigned int outlen, 393 struct nx842_workmem *wmem) 394 { 395 struct coprocessor_request_block *crb; 396 struct coprocessor_status_block *csb; 397 u64 csb_addr; 398 int ret; 399 400 crb = &wmem->crb; 401 csb = &crb->csb; 402 403 /* Clear any previous values */ 404 memset(crb, 0, sizeof(*crb)); 405 406 /* set up DDLs */ 407 ret = setup_ddl(&crb->source, wmem->ddl_in, 408 (unsigned char *)in, inlen, true); 409 if (ret) 410 return ret; 411 412 ret = setup_ddl(&crb->target, wmem->ddl_out, 413 out, outlen, false); 414 if (ret) 415 return ret; 416 417 /* set up CRB's CSB addr */ 418 csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; 419 csb_addr |= CRB_CSB_AT; /* Addrs are phys */ 420 crb->csb_addr = cpu_to_be64(csb_addr); 421 422 return 0; 423 } 424 425 /** 426 * nx842_exec_icswx - compress/decompress data using the 842 algorithm 427 * 428 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 429 * This compresses or decompresses the provided input buffer into the provided 430 * output buffer. 431 * 432 * Upon return from this function @outlen contains the length of the 433 * output data. If there is an error then @outlen will be 0 and an 434 * error will be specified by the return code from this function. 435 * 436 * The @workmem buffer should only be used by one function call at a time. 437 * 438 * @in: input buffer pointer 439 * @inlen: input buffer size 440 * @out: output buffer pointer 441 * @outlenp: output buffer size pointer 442 * @workmem: working memory buffer pointer, size determined by 443 * nx842_powernv_driver.workmem_size 444 * @fc: function code, see CCW Function Codes in nx-842.h 445 * 446 * Returns: 447 * 0 Success, output of length @outlenp stored in the buffer at @out 448 * -ENODEV Hardware unavailable 449 * -ENOSPC Output buffer is to small 450 * -EMSGSIZE Input buffer too large 451 * -EINVAL buffer constraints do not fix nx842_constraints 452 * -EPROTO hardware error during operation 453 * -ETIMEDOUT hardware did not complete operation in reasonable time 454 * -EINTR operation was aborted 455 */ 456 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen, 457 unsigned char *out, unsigned int *outlenp, 458 void *workmem, int fc) 459 { 460 struct coprocessor_request_block *crb; 461 struct coprocessor_status_block *csb; 462 struct nx842_workmem *wmem; 463 int ret; 464 u32 ccw; 465 unsigned int outlen = *outlenp; 466 467 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 468 469 *outlenp = 0; 470 471 /* shoudn't happen, we don't load without a coproc */ 472 if (!nx842_ct) { 473 pr_err_ratelimited("coprocessor CT is 0"); 474 return -ENODEV; 475 } 476 477 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 478 if (ret) 479 return ret; 480 481 crb = &wmem->crb; 482 csb = &crb->csb; 483 484 /* set up CCW */ 485 ccw = 0; 486 ccw = SET_FIELD(CCW_CT, ccw, nx842_ct); 487 ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */ 488 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 489 490 wmem->start = ktime_get(); 491 492 /* do ICSWX */ 493 ret = icswx(cpu_to_be32(ccw), crb); 494 495 pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret, 496 (unsigned int)ccw, 497 (unsigned int)be32_to_cpu(crb->ccw)); 498 499 /* 500 * NX842 coprocessor sets 3rd bit in CR register with XER[S0]. 501 * XER[S0] is the integer summary overflow bit which is nothing 502 * to do NX. Since this bit can be set with other return values, 503 * mask this bit. 504 */ 505 ret &= ~ICSWX_XERS0; 506 507 switch (ret) { 508 case ICSWX_INITIATED: 509 ret = wait_for_csb(wmem, csb); 510 break; 511 case ICSWX_BUSY: 512 pr_debug_ratelimited("842 Coprocessor busy\n"); 513 ret = -EBUSY; 514 break; 515 case ICSWX_REJECTED: 516 pr_err_ratelimited("ICSWX rejected\n"); 517 ret = -EPROTO; 518 break; 519 } 520 521 if (!ret) 522 *outlenp = be32_to_cpu(csb->count); 523 524 return ret; 525 } 526 527 /** 528 * nx842_exec_vas - compress/decompress data using the 842 algorithm 529 * 530 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 531 * This compresses or decompresses the provided input buffer into the provided 532 * output buffer. 533 * 534 * Upon return from this function @outlen contains the length of the 535 * output data. If there is an error then @outlen will be 0 and an 536 * error will be specified by the return code from this function. 537 * 538 * The @workmem buffer should only be used by one function call at a time. 539 * 540 * @in: input buffer pointer 541 * @inlen: input buffer size 542 * @out: output buffer pointer 543 * @outlenp: output buffer size pointer 544 * @workmem: working memory buffer pointer, size determined by 545 * nx842_powernv_driver.workmem_size 546 * @fc: function code, see CCW Function Codes in nx-842.h 547 * 548 * Returns: 549 * 0 Success, output of length @outlenp stored in the buffer 550 * at @out 551 * -ENODEV Hardware unavailable 552 * -ENOSPC Output buffer is to small 553 * -EMSGSIZE Input buffer too large 554 * -EINVAL buffer constraints do not fix nx842_constraints 555 * -EPROTO hardware error during operation 556 * -ETIMEDOUT hardware did not complete operation in reasonable time 557 * -EINTR operation was aborted 558 */ 559 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen, 560 unsigned char *out, unsigned int *outlenp, 561 void *workmem, int fc) 562 { 563 struct coprocessor_request_block *crb; 564 struct coprocessor_status_block *csb; 565 struct nx842_workmem *wmem; 566 struct vas_window *txwin; 567 int ret, i = 0; 568 u32 ccw; 569 unsigned int outlen = *outlenp; 570 571 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 572 573 *outlenp = 0; 574 575 crb = &wmem->crb; 576 csb = &crb->csb; 577 578 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 579 if (ret) 580 return ret; 581 582 ccw = 0; 583 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 584 crb->ccw = cpu_to_be32(ccw); 585 586 do { 587 wmem->start = ktime_get(); 588 preempt_disable(); 589 txwin = this_cpu_read(cpu_txwin); 590 591 /* 592 * VAS copy CRB into L2 cache. Refer <asm/vas.h>. 593 * @crb and @offset. 594 */ 595 vas_copy_crb(crb, 0); 596 597 /* 598 * VAS paste previously copied CRB to NX. 599 * @txwin, @offset and @last (must be true). 600 */ 601 ret = vas_paste_crb(txwin, 0, 1); 602 preempt_enable(); 603 /* 604 * Retry copy/paste function for VAS failures. 605 */ 606 } while (ret && (i++ < VAS_RETRIES)); 607 608 if (ret) { 609 pr_err_ratelimited("VAS copy/paste failed\n"); 610 return ret; 611 } 612 613 ret = wait_for_csb(wmem, csb); 614 if (!ret) 615 *outlenp = be32_to_cpu(csb->count); 616 617 return ret; 618 } 619 620 /** 621 * nx842_powernv_compress - Compress data using the 842 algorithm 622 * 623 * Compression provided by the NX842 coprocessor on IBM PowerNV systems. 624 * The input buffer is compressed and the result is stored in the 625 * provided output buffer. 626 * 627 * Upon return from this function @outlen contains the length of the 628 * compressed data. If there is an error then @outlen will be 0 and an 629 * error will be specified by the return code from this function. 630 * 631 * @in: input buffer pointer 632 * @inlen: input buffer size 633 * @out: output buffer pointer 634 * @outlenp: output buffer size pointer 635 * @workmem: working memory buffer pointer, size determined by 636 * nx842_powernv_driver.workmem_size 637 * 638 * Returns: see @nx842_powernv_exec() 639 */ 640 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, 641 unsigned char *out, unsigned int *outlenp, 642 void *wmem) 643 { 644 return nx842_powernv_exec(in, inlen, out, outlenp, 645 wmem, CCW_FC_842_COMP_CRC); 646 } 647 648 /** 649 * nx842_powernv_decompress - Decompress data using the 842 algorithm 650 * 651 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. 652 * The input buffer is decompressed and the result is stored in the 653 * provided output buffer. 654 * 655 * Upon return from this function @outlen contains the length of the 656 * decompressed data. If there is an error then @outlen will be 0 and an 657 * error will be specified by the return code from this function. 658 * 659 * @in: input buffer pointer 660 * @inlen: input buffer size 661 * @out: output buffer pointer 662 * @outlenp: output buffer size pointer 663 * @wmem: working memory buffer pointer, size determined by 664 * nx842_powernv_driver.workmem_size 665 * 666 * Returns: see @nx842_powernv_exec() 667 */ 668 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, 669 unsigned char *out, unsigned int *outlenp, 670 void *wmem) 671 { 672 return nx842_powernv_exec(in, inlen, out, outlenp, 673 wmem, CCW_FC_842_DECOMP_CRC); 674 } 675 676 static inline void nx_add_coprocs_list(struct nx_coproc *coproc, 677 int chipid) 678 { 679 coproc->chip_id = chipid; 680 INIT_LIST_HEAD(&coproc->list); 681 list_add(&coproc->list, &nx_coprocs); 682 } 683 684 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc) 685 { 686 struct vas_window *txwin = NULL; 687 struct vas_tx_win_attr txattr; 688 689 /* 690 * Kernel requests will be high priority. So open send 691 * windows only for high priority RxFIFO entries. 692 */ 693 vas_init_tx_win_attr(&txattr, coproc->ct); 694 txattr.lpid = 0; /* lpid is 0 for kernel requests */ 695 696 /* 697 * Open a VAS send window which is used to send request to NX. 698 */ 699 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr); 700 if (IS_ERR(txwin)) 701 pr_err("ibm,nx-842: Can not open TX window: %ld\n", 702 PTR_ERR(txwin)); 703 704 return txwin; 705 } 706 707 /* 708 * Identify chip ID for each CPU, open send wndow for the corresponding NX 709 * engine and save txwin in percpu cpu_txwin. 710 * cpu_txwin is used in copy/paste operation for each compression / 711 * decompression request. 712 */ 713 static int nx_open_percpu_txwins(void) 714 { 715 struct nx_coproc *coproc, *n; 716 unsigned int i, chip_id; 717 718 for_each_possible_cpu(i) { 719 struct vas_window *txwin = NULL; 720 721 chip_id = cpu_to_chip_id(i); 722 723 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { 724 /* 725 * Kernel requests use only high priority FIFOs. So 726 * open send windows for these FIFOs. 727 * GZIP is not supported in kernel right now. 728 */ 729 730 if (coproc->ct != VAS_COP_TYPE_842_HIPRI) 731 continue; 732 733 if (coproc->chip_id == chip_id) { 734 txwin = nx_alloc_txwin(coproc); 735 if (IS_ERR(txwin)) 736 return PTR_ERR(txwin); 737 738 per_cpu(cpu_txwin, i) = txwin; 739 break; 740 } 741 } 742 743 if (!per_cpu(cpu_txwin, i)) { 744 /* shouldn't happen, Each chip will have NX engine */ 745 pr_err("NX engine is not available for CPU %d\n", i); 746 return -EINVAL; 747 } 748 } 749 750 return 0; 751 } 752 753 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority, 754 int high, int normal) 755 { 756 if (!strcmp(priority, "High")) 757 coproc->ct = high; 758 else if (!strcmp(priority, "Normal")) 759 coproc->ct = normal; 760 else { 761 pr_err("Invalid RxFIFO priority value\n"); 762 return -EINVAL; 763 } 764 765 return 0; 766 } 767 768 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, 769 int vasid, int type, int *ct) 770 { 771 struct vas_window *rxwin = NULL; 772 struct vas_rx_win_attr rxattr; 773 u32 lpid, pid, tid, fifo_size; 774 struct nx_coproc *coproc; 775 u64 rx_fifo; 776 const char *priority; 777 int ret; 778 779 ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo); 780 if (ret) { 781 pr_err("Missing rx-fifo-address property\n"); 782 return ret; 783 } 784 785 ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size); 786 if (ret) { 787 pr_err("Missing rx-fifo-size property\n"); 788 return ret; 789 } 790 791 ret = of_property_read_u32(dn, "lpid", &lpid); 792 if (ret) { 793 pr_err("Missing lpid property\n"); 794 return ret; 795 } 796 797 ret = of_property_read_u32(dn, "pid", &pid); 798 if (ret) { 799 pr_err("Missing pid property\n"); 800 return ret; 801 } 802 803 ret = of_property_read_u32(dn, "tid", &tid); 804 if (ret) { 805 pr_err("Missing tid property\n"); 806 return ret; 807 } 808 809 ret = of_property_read_string(dn, "priority", &priority); 810 if (ret) { 811 pr_err("Missing priority property\n"); 812 return ret; 813 } 814 815 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 816 if (!coproc) 817 return -ENOMEM; 818 819 if (type == NX_CT_842) 820 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI, 821 VAS_COP_TYPE_842); 822 else if (type == NX_CT_GZIP) 823 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI, 824 VAS_COP_TYPE_GZIP); 825 826 if (ret) 827 goto err_out; 828 829 vas_init_rx_win_attr(&rxattr, coproc->ct); 830 rxattr.rx_fifo = rx_fifo; 831 rxattr.rx_fifo_size = fifo_size; 832 rxattr.lnotify_lpid = lpid; 833 rxattr.lnotify_pid = pid; 834 rxattr.lnotify_tid = tid; 835 /* 836 * Maximum RX window credits can not be more than #CRBs in 837 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns. 838 */ 839 rxattr.wcreds_max = fifo_size / CRB_SIZE; 840 841 /* 842 * Open a VAS receice window which is used to configure RxFIFO 843 * for NX. 844 */ 845 rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr); 846 if (IS_ERR(rxwin)) { 847 ret = PTR_ERR(rxwin); 848 pr_err("setting RxFIFO with VAS failed: %d\n", 849 ret); 850 goto err_out; 851 } 852 853 coproc->vas.rxwin = rxwin; 854 coproc->vas.id = vasid; 855 nx_add_coprocs_list(coproc, chip_id); 856 857 /* 858 * (lpid, pid, tid) combination has to be unique for each 859 * coprocessor instance in the system. So to make it 860 * unique, skiboot uses coprocessor type such as 842 or 861 * GZIP for pid and provides this value to kernel in pid 862 * device-tree property. 863 */ 864 *ct = pid; 865 866 return 0; 867 868 err_out: 869 kfree(coproc); 870 return ret; 871 } 872 873 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip) 874 { 875 int ret = 0; 876 877 if (opal_check_token(OPAL_NX_COPROC_INIT)) { 878 ret = opal_nx_coproc_init(chip_id, ct_842); 879 880 if (!ret) 881 ret = opal_nx_coproc_init(chip_id, ct_gzip); 882 883 if (ret) { 884 ret = opal_error_code(ret); 885 pr_err("Failed to initialize NX for chip(%d): %d\n", 886 chip_id, ret); 887 } 888 } else 889 pr_warn("Firmware doesn't support NX initialization\n"); 890 891 return ret; 892 } 893 894 static int __init find_nx_device_tree(struct device_node *dn, int chip_id, 895 int vasid, int type, char *devname, 896 int *ct) 897 { 898 int ret = 0; 899 900 if (of_device_is_compatible(dn, devname)) { 901 ret = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct); 902 if (ret) 903 of_node_put(dn); 904 } 905 906 return ret; 907 } 908 909 static int __init nx_powernv_probe_vas(struct device_node *pn) 910 { 911 int chip_id, vasid, ret = 0; 912 int ct_842 = 0, ct_gzip = 0; 913 struct device_node *dn; 914 915 chip_id = of_get_ibm_chip_id(pn); 916 if (chip_id < 0) { 917 pr_err("ibm,chip-id missing\n"); 918 return -EINVAL; 919 } 920 921 vasid = chip_to_vas_id(chip_id); 922 if (vasid < 0) { 923 pr_err("Unable to map chip_id %d to vasid\n", chip_id); 924 return -EINVAL; 925 } 926 927 for_each_child_of_node(pn, dn) { 928 ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842, 929 "ibm,p9-nx-842", &ct_842); 930 931 if (!ret) 932 ret = find_nx_device_tree(dn, chip_id, vasid, 933 NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip); 934 935 if (ret) { 936 of_node_put(dn); 937 return ret; 938 } 939 } 940 941 if (!ct_842 || !ct_gzip) { 942 pr_err("NX FIFO nodes are missing\n"); 943 return -EINVAL; 944 } 945 946 /* 947 * Initialize NX instance for both high and normal priority FIFOs. 948 */ 949 ret = nx_coproc_init(chip_id, ct_842, ct_gzip); 950 951 return ret; 952 } 953 954 static int __init nx842_powernv_probe(struct device_node *dn) 955 { 956 struct nx_coproc *coproc; 957 unsigned int ct, ci; 958 int chip_id; 959 960 chip_id = of_get_ibm_chip_id(dn); 961 if (chip_id < 0) { 962 pr_err("ibm,chip-id missing\n"); 963 return -EINVAL; 964 } 965 966 if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) { 967 pr_err("ibm,842-coprocessor-type missing\n"); 968 return -EINVAL; 969 } 970 971 if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) { 972 pr_err("ibm,842-coprocessor-instance missing\n"); 973 return -EINVAL; 974 } 975 976 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 977 if (!coproc) 978 return -ENOMEM; 979 980 coproc->ct = ct; 981 coproc->ci = ci; 982 nx_add_coprocs_list(coproc, chip_id); 983 984 pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci); 985 986 if (!nx842_ct) 987 nx842_ct = ct; 988 else if (nx842_ct != ct) 989 pr_err("NX842 chip %d, CT %d != first found CT %d\n", 990 chip_id, ct, nx842_ct); 991 992 return 0; 993 } 994 995 static void nx_delete_coprocs(void) 996 { 997 struct nx_coproc *coproc, *n; 998 struct vas_window *txwin; 999 int i; 1000 1001 /* 1002 * close percpu txwins that are opened for the corresponding coproc. 1003 */ 1004 for_each_possible_cpu(i) { 1005 txwin = per_cpu(cpu_txwin, i); 1006 if (txwin) 1007 vas_win_close(txwin); 1008 1009 per_cpu(cpu_txwin, i) = NULL; 1010 } 1011 1012 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { 1013 if (coproc->vas.rxwin) 1014 vas_win_close(coproc->vas.rxwin); 1015 1016 list_del(&coproc->list); 1017 kfree(coproc); 1018 } 1019 } 1020 1021 static struct nx842_constraints nx842_powernv_constraints = { 1022 .alignment = DDE_BUFFER_ALIGN, 1023 .multiple = DDE_BUFFER_LAST_MULT, 1024 .minimum = DDE_BUFFER_LAST_MULT, 1025 .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, 1026 }; 1027 1028 static struct nx842_driver nx842_powernv_driver = { 1029 .name = KBUILD_MODNAME, 1030 .owner = THIS_MODULE, 1031 .workmem_size = sizeof(struct nx842_workmem), 1032 .constraints = &nx842_powernv_constraints, 1033 .compress = nx842_powernv_compress, 1034 .decompress = nx842_powernv_decompress, 1035 }; 1036 1037 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm) 1038 { 1039 return nx842_crypto_init(tfm, &nx842_powernv_driver); 1040 } 1041 1042 static struct crypto_alg nx842_powernv_alg = { 1043 .cra_name = "842", 1044 .cra_driver_name = "842-nx", 1045 .cra_priority = 300, 1046 .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, 1047 .cra_ctxsize = sizeof(struct nx842_crypto_ctx), 1048 .cra_module = THIS_MODULE, 1049 .cra_init = nx842_powernv_crypto_init, 1050 .cra_exit = nx842_crypto_exit, 1051 .cra_u = { .compress = { 1052 .coa_compress = nx842_crypto_compress, 1053 .coa_decompress = nx842_crypto_decompress } } 1054 }; 1055 1056 static __init int nx_compress_powernv_init(void) 1057 { 1058 struct device_node *dn; 1059 int ret; 1060 1061 /* verify workmem size/align restrictions */ 1062 BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); 1063 BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); 1064 BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); 1065 /* verify buffer size/align restrictions */ 1066 BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); 1067 BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); 1068 BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); 1069 1070 for_each_compatible_node(dn, NULL, "ibm,power9-nx") { 1071 ret = nx_powernv_probe_vas(dn); 1072 if (ret) { 1073 nx_delete_coprocs(); 1074 of_node_put(dn); 1075 return ret; 1076 } 1077 } 1078 1079 if (list_empty(&nx_coprocs)) { 1080 for_each_compatible_node(dn, NULL, "ibm,power-nx") 1081 nx842_powernv_probe(dn); 1082 1083 if (!nx842_ct) 1084 return -ENODEV; 1085 1086 nx842_powernv_exec = nx842_exec_icswx; 1087 } else { 1088 /* 1089 * Register VAS user space API for NX GZIP so 1090 * that user space can use GZIP engine. 1091 * Using high FIFO priority for kernel requests and 1092 * normal FIFO priority is assigned for userspace. 1093 * 842 compression is supported only in kernel. 1094 */ 1095 ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, 1096 "nx-gzip"); 1097 1098 /* 1099 * GZIP is not supported in kernel right now. 1100 * So open tx windows only for 842. 1101 */ 1102 if (!ret) 1103 ret = nx_open_percpu_txwins(); 1104 1105 if (ret) { 1106 nx_delete_coprocs(); 1107 return ret; 1108 } 1109 1110 nx842_powernv_exec = nx842_exec_vas; 1111 } 1112 1113 ret = crypto_register_alg(&nx842_powernv_alg); 1114 if (ret) { 1115 nx_delete_coprocs(); 1116 return ret; 1117 } 1118 1119 return 0; 1120 } 1121 module_init(nx_compress_powernv_init); 1122 1123 static void __exit nx_compress_powernv_exit(void) 1124 { 1125 /* 1126 * GZIP engine is supported only in power9 or later and nx842_ct 1127 * is used on power8 (icswx). 1128 * VAS API for NX GZIP is registered during init for user space 1129 * use. So delete this API use for GZIP engine. 1130 */ 1131 if (!nx842_ct) 1132 vas_unregister_api_powernv(); 1133 1134 crypto_unregister_alg(&nx842_powernv_alg); 1135 1136 nx_delete_coprocs(); 1137 } 1138 module_exit(nx_compress_powernv_exit); 1139