1 /* 2 * Driver for IBM Power 842 compression accelerator 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 * 18 * Copyright (C) IBM Corporation, 2012 19 * 20 * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> 21 * Seth Jennings <sjenning@linux.vnet.ibm.com> 22 */ 23 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/nx842.h> 27 #include <linux/of.h> 28 #include <linux/slab.h> 29 30 #include <asm/page.h> 31 #include <asm/vio.h> 32 33 #include "nx_csbcpb.h" /* struct nx_csbcpb */ 34 35 #define MODULE_NAME "nx-compress" 36 MODULE_LICENSE("GPL"); 37 MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); 38 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); 39 40 #define SHIFT_4K 12 41 #define SHIFT_64K 16 42 #define SIZE_4K (1UL << SHIFT_4K) 43 #define SIZE_64K (1UL << SHIFT_64K) 44 45 /* IO buffer must be 128 byte aligned */ 46 #define IO_BUFFER_ALIGN 128 47 48 struct nx842_header { 49 int blocks_nr; /* number of compressed blocks */ 50 int offset; /* offset of the first block (from beginning of header) */ 51 int sizes[0]; /* size of compressed blocks */ 52 }; 53 54 static inline int nx842_header_size(const struct nx842_header *hdr) 55 { 56 return sizeof(struct nx842_header) + 57 hdr->blocks_nr * sizeof(hdr->sizes[0]); 58 } 59 60 /* Macros for fields within nx_csbcpb */ 61 /* Check the valid bit within the csbcpb valid field */ 62 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) 63 64 /* CE macros operate on the completion_extension field bits in the csbcpb. 65 * CE0 0=full completion, 1=partial completion 66 * CE1 0=CE0 indicates completion, 1=termination (output may be modified) 67 * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ 68 #define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) 69 #define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) 70 #define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) 71 72 /* The NX unit accepts data only on 4K page boundaries */ 73 #define NX842_HW_PAGE_SHIFT SHIFT_4K 74 #define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT) 75 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) 76 77 enum nx842_status { 78 UNAVAILABLE, 79 AVAILABLE 80 }; 81 82 struct ibm_nx842_counters { 83 atomic64_t comp_complete; 84 atomic64_t comp_failed; 85 atomic64_t decomp_complete; 86 atomic64_t decomp_failed; 87 atomic64_t swdecomp; 88 atomic64_t comp_times[32]; 89 atomic64_t decomp_times[32]; 90 }; 91 92 static struct nx842_devdata { 93 struct vio_dev *vdev; 94 struct device *dev; 95 struct ibm_nx842_counters *counters; 96 unsigned int max_sg_len; 97 unsigned int max_sync_size; 98 unsigned int max_sync_sg; 99 enum nx842_status status; 100 } __rcu *devdata; 101 static DEFINE_SPINLOCK(devdata_mutex); 102 103 #define NX842_COUNTER_INC(_x) \ 104 static inline void nx842_inc_##_x( \ 105 const struct nx842_devdata *dev) { \ 106 if (dev) \ 107 atomic64_inc(&dev->counters->_x); \ 108 } 109 NX842_COUNTER_INC(comp_complete); 110 NX842_COUNTER_INC(comp_failed); 111 NX842_COUNTER_INC(decomp_complete); 112 NX842_COUNTER_INC(decomp_failed); 113 NX842_COUNTER_INC(swdecomp); 114 115 #define NX842_HIST_SLOTS 16 116 117 static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) 118 { 119 int bucket = fls(time); 120 121 if (bucket) 122 bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); 123 124 atomic64_inc(×[bucket]); 125 } 126 127 /* NX unit operation flags */ 128 #define NX842_OP_COMPRESS 0x0 129 #define NX842_OP_CRC 0x1 130 #define NX842_OP_DECOMPRESS 0x2 131 #define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) 132 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) 133 #define NX842_OP_ASYNC (1<<23) 134 #define NX842_OP_NOTIFY (1<<22) 135 #define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) 136 137 static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) 138 { 139 /* No use of DMA mappings within the driver. */ 140 return 0; 141 } 142 143 struct nx842_slentry { 144 unsigned long ptr; /* Real address (use __pa()) */ 145 unsigned long len; 146 }; 147 148 /* pHyp scatterlist entry */ 149 struct nx842_scatterlist { 150 int entry_nr; /* number of slentries */ 151 struct nx842_slentry *entries; /* ptr to array of slentries */ 152 }; 153 154 /* Does not include sizeof(entry_nr) in the size */ 155 static inline unsigned long nx842_get_scatterlist_size( 156 struct nx842_scatterlist *sl) 157 { 158 return sl->entry_nr * sizeof(struct nx842_slentry); 159 } 160 161 static inline unsigned long nx842_get_pa(void *addr) 162 { 163 if (is_vmalloc_addr(addr)) 164 return page_to_phys(vmalloc_to_page(addr)) 165 + offset_in_page(addr); 166 else 167 return __pa(addr); 168 } 169 170 static int nx842_build_scatterlist(unsigned long buf, int len, 171 struct nx842_scatterlist *sl) 172 { 173 unsigned long nextpage; 174 struct nx842_slentry *entry; 175 176 sl->entry_nr = 0; 177 178 entry = sl->entries; 179 while (len) { 180 entry->ptr = nx842_get_pa((void *)buf); 181 nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); 182 if (nextpage < buf + len) { 183 /* we aren't at the end yet */ 184 if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) 185 /* we are in the middle (or beginning) */ 186 entry->len = NX842_HW_PAGE_SIZE; 187 else 188 /* we are at the beginning */ 189 entry->len = nextpage - buf; 190 } else { 191 /* at the end */ 192 entry->len = len; 193 } 194 195 len -= entry->len; 196 buf += entry->len; 197 sl->entry_nr++; 198 entry++; 199 } 200 201 return 0; 202 } 203 204 /* 205 * Working memory for software decompression 206 */ 207 struct sw842_fifo { 208 union { 209 char f8[256][8]; 210 char f4[512][4]; 211 }; 212 char f2[256][2]; 213 unsigned char f84_full; 214 unsigned char f2_full; 215 unsigned char f8_count; 216 unsigned char f2_count; 217 unsigned int f4_count; 218 }; 219 220 /* 221 * Working memory for crypto API 222 */ 223 struct nx842_workmem { 224 char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */ 225 union { 226 /* hardware working memory */ 227 struct { 228 /* scatterlist */ 229 char slin[SIZE_4K]; 230 char slout[SIZE_4K]; 231 /* coprocessor status/parameter block */ 232 struct nx_csbcpb csbcpb; 233 }; 234 /* software working memory */ 235 struct sw842_fifo swfifo; /* software decompression fifo */ 236 }; 237 }; 238 239 int nx842_get_workmem_size(void) 240 { 241 return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE; 242 } 243 EXPORT_SYMBOL_GPL(nx842_get_workmem_size); 244 245 int nx842_get_workmem_size_aligned(void) 246 { 247 return sizeof(struct nx842_workmem); 248 } 249 EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned); 250 251 static int nx842_validate_result(struct device *dev, 252 struct cop_status_block *csb) 253 { 254 /* The csb must be valid after returning from vio_h_cop_sync */ 255 if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { 256 dev_err(dev, "%s: cspcbp not valid upon completion.\n", 257 __func__); 258 dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", 259 csb->valid, 260 csb->crb_seq_number, 261 csb->completion_code, 262 csb->completion_extension); 263 dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", 264 csb->processed_byte_count, 265 (unsigned long)csb->address); 266 return -EIO; 267 } 268 269 /* Check return values from the hardware in the CSB */ 270 switch (csb->completion_code) { 271 case 0: /* Completed without error */ 272 break; 273 case 64: /* Target bytes > Source bytes during compression */ 274 case 13: /* Output buffer too small */ 275 dev_dbg(dev, "%s: Compression output larger than input\n", 276 __func__); 277 return -ENOSPC; 278 case 66: /* Input data contains an illegal template field */ 279 case 67: /* Template indicates data past the end of the input stream */ 280 dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", 281 __func__, csb->completion_code); 282 return -EINVAL; 283 default: 284 dev_dbg(dev, "%s: Unspecified error (code:%d)\n", 285 __func__, csb->completion_code); 286 return -EIO; 287 } 288 289 /* Hardware sanity check */ 290 if (!NX842_CSBCPB_CE2(csb->completion_extension)) { 291 dev_err(dev, "%s: No error returned by hardware, but " 292 "data returned is unusable, contact support.\n" 293 "(Additional info: csbcbp->processed bytes " 294 "does not specify processed bytes for the " 295 "target buffer.)\n", __func__); 296 return -EIO; 297 } 298 299 return 0; 300 } 301 302 /** 303 * nx842_compress - Compress data using the 842 algorithm 304 * 305 * Compression provide by the NX842 coprocessor on IBM Power systems. 306 * The input buffer is compressed and the result is stored in the 307 * provided output buffer. 308 * 309 * Upon return from this function @outlen contains the length of the 310 * compressed data. If there is an error then @outlen will be 0 and an 311 * error will be specified by the return code from this function. 312 * 313 * @in: Pointer to input buffer, must be page aligned 314 * @inlen: Length of input buffer, must be PAGE_SIZE 315 * @out: Pointer to output buffer 316 * @outlen: Length of output buffer 317 * @wrkmem: ptr to buffer for working memory, size determined by 318 * nx842_get_workmem_size() 319 * 320 * Returns: 321 * 0 Success, output of length @outlen stored in the buffer at @out 322 * -ENOMEM Unable to allocate internal buffers 323 * -ENOSPC Output buffer is to small 324 * -EMSGSIZE XXX Difficult to describe this limitation 325 * -EIO Internal error 326 * -ENODEV Hardware unavailable 327 */ 328 int nx842_compress(const unsigned char *in, unsigned int inlen, 329 unsigned char *out, unsigned int *outlen, void *wmem) 330 { 331 struct nx842_header *hdr; 332 struct nx842_devdata *local_devdata; 333 struct device *dev = NULL; 334 struct nx842_workmem *workmem; 335 struct nx842_scatterlist slin, slout; 336 struct nx_csbcpb *csbcpb; 337 int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; 338 unsigned long inbuf, outbuf, padding; 339 struct vio_pfo_op op = { 340 .done = NULL, 341 .handle = 0, 342 .timeout = 0, 343 }; 344 unsigned long start_time = get_tb(); 345 346 /* 347 * Make sure input buffer is 64k page aligned. This is assumed since 348 * this driver is designed for page compression only (for now). This 349 * is very nice since we can now use direct DDE(s) for the input and 350 * the alignment is guaranteed. 351 */ 352 inbuf = (unsigned long)in; 353 if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) 354 return -EINVAL; 355 356 rcu_read_lock(); 357 local_devdata = rcu_dereference(devdata); 358 if (!local_devdata || !local_devdata->dev) { 359 rcu_read_unlock(); 360 return -ENODEV; 361 } 362 max_sync_size = local_devdata->max_sync_size; 363 dev = local_devdata->dev; 364 365 /* Create the header */ 366 hdr = (struct nx842_header *)out; 367 hdr->blocks_nr = PAGE_SIZE / max_sync_size; 368 hdrsize = nx842_header_size(hdr); 369 outbuf = (unsigned long)out + hdrsize; 370 bytesleft = *outlen - hdrsize; 371 372 /* Init scatterlist */ 373 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 374 NX842_HW_PAGE_SIZE); 375 slin.entries = (struct nx842_slentry *)workmem->slin; 376 slout.entries = (struct nx842_slentry *)workmem->slout; 377 378 /* Init operation */ 379 op.flags = NX842_OP_COMPRESS; 380 csbcpb = &workmem->csbcpb; 381 memset(csbcpb, 0, sizeof(*csbcpb)); 382 op.csbcpb = nx842_get_pa(csbcpb); 383 op.out = nx842_get_pa(slout.entries); 384 385 for (i = 0; i < hdr->blocks_nr; i++) { 386 /* 387 * Aligning the output blocks to 128 bytes does waste space, 388 * but it prevents the need for bounce buffers and memory 389 * copies. It also simplifies the code a lot. In the worst 390 * case (64k page, 4k max_sync_size), you lose up to 391 * (128*16)/64k = ~3% the compression factor. For 64k 392 * max_sync_size, the loss would be at most 128/64k = ~0.2%. 393 */ 394 padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; 395 outbuf += padding; 396 bytesleft -= padding; 397 if (i == 0) 398 /* save offset into first block in header */ 399 hdr->offset = padding + hdrsize; 400 401 if (bytesleft <= 0) { 402 ret = -ENOSPC; 403 goto unlock; 404 } 405 406 /* 407 * NOTE: If the default max_sync_size is changed from 4k 408 * to 64k, remove the "likely" case below, since a 409 * scatterlist will always be needed. 410 */ 411 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { 412 /* Create direct DDE */ 413 op.in = nx842_get_pa((void *)inbuf); 414 op.inlen = max_sync_size; 415 416 } else { 417 /* Create indirect DDE (scatterlist) */ 418 nx842_build_scatterlist(inbuf, max_sync_size, &slin); 419 op.in = nx842_get_pa(slin.entries); 420 op.inlen = -nx842_get_scatterlist_size(&slin); 421 } 422 423 /* 424 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect 425 * DDE is required for the outbuf. 426 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must 427 * also be page aligned (1 in 128/4k=32 chance) in order 428 * to use a direct DDE. 429 * This is unlikely, just use an indirect DDE always. 430 */ 431 nx842_build_scatterlist(outbuf, 432 min(bytesleft, max_sync_size), &slout); 433 /* op.out set before loop */ 434 op.outlen = -nx842_get_scatterlist_size(&slout); 435 436 /* Send request to pHyp */ 437 ret = vio_h_cop_sync(local_devdata->vdev, &op); 438 439 /* Check for pHyp error */ 440 if (ret) { 441 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", 442 __func__, ret, op.hcall_err); 443 ret = -EIO; 444 goto unlock; 445 } 446 447 /* Check for hardware error */ 448 ret = nx842_validate_result(dev, &csbcpb->csb); 449 if (ret && ret != -ENOSPC) 450 goto unlock; 451 452 /* Handle incompressible data */ 453 if (unlikely(ret == -ENOSPC)) { 454 if (bytesleft < max_sync_size) { 455 /* 456 * Not enough space left in the output buffer 457 * to store uncompressed block 458 */ 459 goto unlock; 460 } else { 461 /* Store incompressible block */ 462 memcpy((void *)outbuf, (void *)inbuf, 463 max_sync_size); 464 hdr->sizes[i] = -max_sync_size; 465 outbuf += max_sync_size; 466 bytesleft -= max_sync_size; 467 /* Reset ret, incompressible data handled */ 468 ret = 0; 469 } 470 } else { 471 /* Normal case, compression was successful */ 472 size = csbcpb->csb.processed_byte_count; 473 dev_dbg(dev, "%s: processed_bytes=%d\n", 474 __func__, size); 475 hdr->sizes[i] = size; 476 outbuf += size; 477 bytesleft -= size; 478 } 479 480 inbuf += max_sync_size; 481 } 482 483 *outlen = (unsigned int)(outbuf - (unsigned long)out); 484 485 unlock: 486 if (ret) 487 nx842_inc_comp_failed(local_devdata); 488 else { 489 nx842_inc_comp_complete(local_devdata); 490 ibm_nx842_incr_hist(local_devdata->counters->comp_times, 491 (get_tb() - start_time) / tb_ticks_per_usec); 492 } 493 rcu_read_unlock(); 494 return ret; 495 } 496 EXPORT_SYMBOL_GPL(nx842_compress); 497 498 static int sw842_decompress(const unsigned char *, int, unsigned char *, int *, 499 const void *); 500 501 /** 502 * nx842_decompress - Decompress data using the 842 algorithm 503 * 504 * Decompression provide by the NX842 coprocessor on IBM Power systems. 505 * The input buffer is decompressed and the result is stored in the 506 * provided output buffer. The size allocated to the output buffer is 507 * provided by the caller of this function in @outlen. Upon return from 508 * this function @outlen contains the length of the decompressed data. 509 * If there is an error then @outlen will be 0 and an error will be 510 * specified by the return code from this function. 511 * 512 * @in: Pointer to input buffer, will use bounce buffer if not 128 byte 513 * aligned 514 * @inlen: Length of input buffer 515 * @out: Pointer to output buffer, must be page aligned 516 * @outlen: Length of output buffer, must be PAGE_SIZE 517 * @wrkmem: ptr to buffer for working memory, size determined by 518 * nx842_get_workmem_size() 519 * 520 * Returns: 521 * 0 Success, output of length @outlen stored in the buffer at @out 522 * -ENODEV Hardware decompression device is unavailable 523 * -ENOMEM Unable to allocate internal buffers 524 * -ENOSPC Output buffer is to small 525 * -EINVAL Bad input data encountered when attempting decompress 526 * -EIO Internal error 527 */ 528 int nx842_decompress(const unsigned char *in, unsigned int inlen, 529 unsigned char *out, unsigned int *outlen, void *wmem) 530 { 531 struct nx842_header *hdr; 532 struct nx842_devdata *local_devdata; 533 struct device *dev = NULL; 534 struct nx842_workmem *workmem; 535 struct nx842_scatterlist slin, slout; 536 struct nx_csbcpb *csbcpb; 537 int ret = 0, i, size, max_sync_size; 538 unsigned long inbuf, outbuf; 539 struct vio_pfo_op op = { 540 .done = NULL, 541 .handle = 0, 542 .timeout = 0, 543 }; 544 unsigned long start_time = get_tb(); 545 546 /* Ensure page alignment and size */ 547 outbuf = (unsigned long)out; 548 if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) 549 return -EINVAL; 550 551 rcu_read_lock(); 552 local_devdata = rcu_dereference(devdata); 553 if (local_devdata) 554 dev = local_devdata->dev; 555 556 /* Get header */ 557 hdr = (struct nx842_header *)in; 558 559 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 560 NX842_HW_PAGE_SIZE); 561 562 inbuf = (unsigned long)in + hdr->offset; 563 if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) { 564 /* Copy block(s) into bounce buffer for alignment */ 565 memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset); 566 inbuf = (unsigned long)workmem->bounce; 567 } 568 569 /* Init scatterlist */ 570 slin.entries = (struct nx842_slentry *)workmem->slin; 571 slout.entries = (struct nx842_slentry *)workmem->slout; 572 573 /* Init operation */ 574 op.flags = NX842_OP_DECOMPRESS; 575 csbcpb = &workmem->csbcpb; 576 memset(csbcpb, 0, sizeof(*csbcpb)); 577 op.csbcpb = nx842_get_pa(csbcpb); 578 579 /* 580 * max_sync_size may have changed since compression, 581 * so we can't read it from the device info. We need 582 * to derive it from hdr->blocks_nr. 583 */ 584 max_sync_size = PAGE_SIZE / hdr->blocks_nr; 585 586 for (i = 0; i < hdr->blocks_nr; i++) { 587 /* Skip padding */ 588 inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); 589 590 if (hdr->sizes[i] < 0) { 591 /* Negative sizes indicate uncompressed data blocks */ 592 size = abs(hdr->sizes[i]); 593 memcpy((void *)outbuf, (void *)inbuf, size); 594 outbuf += size; 595 inbuf += size; 596 continue; 597 } 598 599 if (!dev) 600 goto sw; 601 602 /* 603 * The better the compression, the more likely the "likely" 604 * case becomes. 605 */ 606 if (likely((inbuf & NX842_HW_PAGE_MASK) == 607 ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) { 608 /* Create direct DDE */ 609 op.in = nx842_get_pa((void *)inbuf); 610 op.inlen = hdr->sizes[i]; 611 } else { 612 /* Create indirect DDE (scatterlist) */ 613 nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin); 614 op.in = nx842_get_pa(slin.entries); 615 op.inlen = -nx842_get_scatterlist_size(&slin); 616 } 617 618 /* 619 * NOTE: If the default max_sync_size is changed from 4k 620 * to 64k, remove the "likely" case below, since a 621 * scatterlist will always be needed. 622 */ 623 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { 624 /* Create direct DDE */ 625 op.out = nx842_get_pa((void *)outbuf); 626 op.outlen = max_sync_size; 627 } else { 628 /* Create indirect DDE (scatterlist) */ 629 nx842_build_scatterlist(outbuf, max_sync_size, &slout); 630 op.out = nx842_get_pa(slout.entries); 631 op.outlen = -nx842_get_scatterlist_size(&slout); 632 } 633 634 /* Send request to pHyp */ 635 ret = vio_h_cop_sync(local_devdata->vdev, &op); 636 637 /* Check for pHyp error */ 638 if (ret) { 639 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", 640 __func__, ret, op.hcall_err); 641 dev = NULL; 642 goto sw; 643 } 644 645 /* Check for hardware error */ 646 ret = nx842_validate_result(dev, &csbcpb->csb); 647 if (ret) { 648 dev = NULL; 649 goto sw; 650 } 651 652 /* HW decompression success */ 653 inbuf += hdr->sizes[i]; 654 outbuf += csbcpb->csb.processed_byte_count; 655 continue; 656 657 sw: 658 /* software decompression */ 659 size = max_sync_size; 660 ret = sw842_decompress( 661 (unsigned char *)inbuf, hdr->sizes[i], 662 (unsigned char *)outbuf, &size, wmem); 663 if (ret) 664 pr_debug("%s: sw842_decompress failed with %d\n", 665 __func__, ret); 666 667 if (ret) { 668 if (ret != -ENOSPC && ret != -EINVAL && 669 ret != -EMSGSIZE) 670 ret = -EIO; 671 goto unlock; 672 } 673 674 /* SW decompression success */ 675 inbuf += hdr->sizes[i]; 676 outbuf += size; 677 } 678 679 *outlen = (unsigned int)(outbuf - (unsigned long)out); 680 681 unlock: 682 if (ret) 683 /* decompress fail */ 684 nx842_inc_decomp_failed(local_devdata); 685 else { 686 if (!dev) 687 /* software decompress */ 688 nx842_inc_swdecomp(local_devdata); 689 nx842_inc_decomp_complete(local_devdata); 690 ibm_nx842_incr_hist(local_devdata->counters->decomp_times, 691 (get_tb() - start_time) / tb_ticks_per_usec); 692 } 693 694 rcu_read_unlock(); 695 return ret; 696 } 697 EXPORT_SYMBOL_GPL(nx842_decompress); 698 699 /** 700 * nx842_OF_set_defaults -- Set default (disabled) values for devdata 701 * 702 * @devdata - struct nx842_devdata to update 703 * 704 * Returns: 705 * 0 on success 706 * -ENOENT if @devdata ptr is NULL 707 */ 708 static int nx842_OF_set_defaults(struct nx842_devdata *devdata) 709 { 710 if (devdata) { 711 devdata->max_sync_size = 0; 712 devdata->max_sync_sg = 0; 713 devdata->max_sg_len = 0; 714 devdata->status = UNAVAILABLE; 715 return 0; 716 } else 717 return -ENOENT; 718 } 719 720 /** 721 * nx842_OF_upd_status -- Update the device info from OF status prop 722 * 723 * The status property indicates if the accelerator is enabled. If the 724 * device is in the OF tree it indicates that the hardware is present. 725 * The status field indicates if the device is enabled when the status 726 * is 'okay'. Otherwise the device driver will be disabled. 727 * 728 * @devdata - struct nx842_devdata to update 729 * @prop - struct property point containing the maxsyncop for the update 730 * 731 * Returns: 732 * 0 - Device is available 733 * -EINVAL - Device is not available 734 */ 735 static int nx842_OF_upd_status(struct nx842_devdata *devdata, 736 struct property *prop) { 737 int ret = 0; 738 const char *status = (const char *)prop->value; 739 740 if (!strncmp(status, "okay", (size_t)prop->length)) { 741 devdata->status = AVAILABLE; 742 } else { 743 dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", 744 __func__, status); 745 devdata->status = UNAVAILABLE; 746 } 747 748 return ret; 749 } 750 751 /** 752 * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop 753 * 754 * Definition of the 'ibm,max-sg-len' OF property: 755 * This field indicates the maximum byte length of a scatter list 756 * for the platform facility. It is a single cell encoded as with encode-int. 757 * 758 * Example: 759 * # od -x ibm,max-sg-len 760 * 0000000 0000 0ff0 761 * 762 * In this example, the maximum byte length of a scatter list is 763 * 0x0ff0 (4,080). 764 * 765 * @devdata - struct nx842_devdata to update 766 * @prop - struct property point containing the maxsyncop for the update 767 * 768 * Returns: 769 * 0 on success 770 * -EINVAL on failure 771 */ 772 static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, 773 struct property *prop) { 774 int ret = 0; 775 const int *maxsglen = prop->value; 776 777 if (prop->length != sizeof(*maxsglen)) { 778 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); 779 dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, 780 prop->length, sizeof(*maxsglen)); 781 ret = -EINVAL; 782 } else { 783 devdata->max_sg_len = (unsigned int)min(*maxsglen, 784 (int)NX842_HW_PAGE_SIZE); 785 } 786 787 return ret; 788 } 789 790 /** 791 * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop 792 * 793 * Definition of the 'ibm,max-sync-cop' OF property: 794 * Two series of cells. The first series of cells represents the maximums 795 * that can be synchronously compressed. The second series of cells 796 * represents the maximums that can be synchronously decompressed. 797 * 1. The first cell in each series contains the count of the number of 798 * data length, scatter list elements pairs that follow – each being 799 * of the form 800 * a. One cell data byte length 801 * b. One cell total number of scatter list elements 802 * 803 * Example: 804 * # od -x ibm,max-sync-cop 805 * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 806 * 0000020 0000 1000 0000 01fe 807 * 808 * In this example, compression supports 0x1000 (4,096) data byte length 809 * and 0x1fe (510) total scatter list elements. Decompression supports 810 * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list 811 * elements. 812 * 813 * @devdata - struct nx842_devdata to update 814 * @prop - struct property point containing the maxsyncop for the update 815 * 816 * Returns: 817 * 0 on success 818 * -EINVAL on failure 819 */ 820 static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, 821 struct property *prop) { 822 int ret = 0; 823 const struct maxsynccop_t { 824 int comp_elements; 825 int comp_data_limit; 826 int comp_sg_limit; 827 int decomp_elements; 828 int decomp_data_limit; 829 int decomp_sg_limit; 830 } *maxsynccop; 831 832 if (prop->length != sizeof(*maxsynccop)) { 833 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); 834 dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, 835 sizeof(*maxsynccop)); 836 ret = -EINVAL; 837 goto out; 838 } 839 840 maxsynccop = (const struct maxsynccop_t *)prop->value; 841 842 /* Use one limit rather than separate limits for compression and 843 * decompression. Set a maximum for this so as not to exceed the 844 * size that the header can support and round the value down to 845 * the hardware page size (4K) */ 846 devdata->max_sync_size = 847 (unsigned int)min(maxsynccop->comp_data_limit, 848 maxsynccop->decomp_data_limit); 849 850 devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, 851 SIZE_64K); 852 853 if (devdata->max_sync_size < SIZE_4K) { 854 dev_err(devdata->dev, "%s: hardware max data size (%u) is " 855 "less than the driver minimum, unable to use " 856 "the hardware device\n", 857 __func__, devdata->max_sync_size); 858 ret = -EINVAL; 859 goto out; 860 } 861 862 devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, 863 maxsynccop->decomp_sg_limit); 864 if (devdata->max_sync_sg < 1) { 865 dev_err(devdata->dev, "%s: hardware max sg size (%u) is " 866 "less than the driver minimum, unable to use " 867 "the hardware device\n", 868 __func__, devdata->max_sync_sg); 869 ret = -EINVAL; 870 goto out; 871 } 872 873 out: 874 return ret; 875 } 876 877 /** 878 * 879 * nx842_OF_upd -- Handle OF properties updates for the device. 880 * 881 * Set all properties from the OF tree. Optionally, a new property 882 * can be provided by the @new_prop pointer to overwrite an existing value. 883 * The device will remain disabled until all values are valid, this function 884 * will return an error for updates unless all values are valid. 885 * 886 * @new_prop: If not NULL, this property is being updated. If NULL, update 887 * all properties from the current values in the OF tree. 888 * 889 * Returns: 890 * 0 - Success 891 * -ENOMEM - Could not allocate memory for new devdata structure 892 * -EINVAL - property value not found, new_prop is not a recognized 893 * property for the device or property value is not valid. 894 * -ENODEV - Device is not available 895 */ 896 static int nx842_OF_upd(struct property *new_prop) 897 { 898 struct nx842_devdata *old_devdata = NULL; 899 struct nx842_devdata *new_devdata = NULL; 900 struct device_node *of_node = NULL; 901 struct property *status = NULL; 902 struct property *maxsglen = NULL; 903 struct property *maxsyncop = NULL; 904 int ret = 0; 905 unsigned long flags; 906 907 spin_lock_irqsave(&devdata_mutex, flags); 908 old_devdata = rcu_dereference_check(devdata, 909 lockdep_is_held(&devdata_mutex)); 910 if (old_devdata) 911 of_node = old_devdata->dev->of_node; 912 913 if (!old_devdata || !of_node) { 914 pr_err("%s: device is not available\n", __func__); 915 spin_unlock_irqrestore(&devdata_mutex, flags); 916 return -ENODEV; 917 } 918 919 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); 920 if (!new_devdata) { 921 dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); 922 ret = -ENOMEM; 923 goto error_out; 924 } 925 926 memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); 927 new_devdata->counters = old_devdata->counters; 928 929 /* Set ptrs for existing properties */ 930 status = of_find_property(of_node, "status", NULL); 931 maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); 932 maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); 933 if (!status || !maxsglen || !maxsyncop) { 934 dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); 935 ret = -EINVAL; 936 goto error_out; 937 } 938 939 /* 940 * If this is a property update, there are only certain properties that 941 * we care about. Bail if it isn't in the below list 942 */ 943 if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || 944 strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || 945 strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) 946 goto out; 947 948 /* Perform property updates */ 949 ret = nx842_OF_upd_status(new_devdata, status); 950 if (ret) 951 goto error_out; 952 953 ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); 954 if (ret) 955 goto error_out; 956 957 ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); 958 if (ret) 959 goto error_out; 960 961 out: 962 dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", 963 __func__, new_devdata->max_sync_size, 964 old_devdata->max_sync_size); 965 dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", 966 __func__, new_devdata->max_sync_sg, 967 old_devdata->max_sync_sg); 968 dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", 969 __func__, new_devdata->max_sg_len, 970 old_devdata->max_sg_len); 971 972 rcu_assign_pointer(devdata, new_devdata); 973 spin_unlock_irqrestore(&devdata_mutex, flags); 974 synchronize_rcu(); 975 dev_set_drvdata(new_devdata->dev, new_devdata); 976 kfree(old_devdata); 977 return 0; 978 979 error_out: 980 if (new_devdata) { 981 dev_info(old_devdata->dev, "%s: device disabled\n", __func__); 982 nx842_OF_set_defaults(new_devdata); 983 rcu_assign_pointer(devdata, new_devdata); 984 spin_unlock_irqrestore(&devdata_mutex, flags); 985 synchronize_rcu(); 986 dev_set_drvdata(new_devdata->dev, new_devdata); 987 kfree(old_devdata); 988 } else { 989 dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); 990 spin_unlock_irqrestore(&devdata_mutex, flags); 991 } 992 993 if (!ret) 994 ret = -EINVAL; 995 return ret; 996 } 997 998 /** 999 * nx842_OF_notifier - Process updates to OF properties for the device 1000 * 1001 * @np: notifier block 1002 * @action: notifier action 1003 * @update: struct pSeries_reconfig_prop_update pointer if action is 1004 * PSERIES_UPDATE_PROPERTY 1005 * 1006 * Returns: 1007 * NOTIFY_OK on success 1008 * NOTIFY_BAD encoded with error number on failure, use 1009 * notifier_to_errno() to decode this value 1010 */ 1011 static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, 1012 void *update) 1013 { 1014 struct of_prop_reconfig *upd = update; 1015 struct nx842_devdata *local_devdata; 1016 struct device_node *node = NULL; 1017 1018 rcu_read_lock(); 1019 local_devdata = rcu_dereference(devdata); 1020 if (local_devdata) 1021 node = local_devdata->dev->of_node; 1022 1023 if (local_devdata && 1024 action == OF_RECONFIG_UPDATE_PROPERTY && 1025 !strcmp(upd->dn->name, node->name)) { 1026 rcu_read_unlock(); 1027 nx842_OF_upd(upd->prop); 1028 } else 1029 rcu_read_unlock(); 1030 1031 return NOTIFY_OK; 1032 } 1033 1034 static struct notifier_block nx842_of_nb = { 1035 .notifier_call = nx842_OF_notifier, 1036 }; 1037 1038 #define nx842_counter_read(_name) \ 1039 static ssize_t nx842_##_name##_show(struct device *dev, \ 1040 struct device_attribute *attr, \ 1041 char *buf) { \ 1042 struct nx842_devdata *local_devdata; \ 1043 int p = 0; \ 1044 rcu_read_lock(); \ 1045 local_devdata = rcu_dereference(devdata); \ 1046 if (local_devdata) \ 1047 p = snprintf(buf, PAGE_SIZE, "%ld\n", \ 1048 atomic64_read(&local_devdata->counters->_name)); \ 1049 rcu_read_unlock(); \ 1050 return p; \ 1051 } 1052 1053 #define NX842DEV_COUNTER_ATTR_RO(_name) \ 1054 nx842_counter_read(_name); \ 1055 static struct device_attribute dev_attr_##_name = __ATTR(_name, \ 1056 0444, \ 1057 nx842_##_name##_show,\ 1058 NULL); 1059 1060 NX842DEV_COUNTER_ATTR_RO(comp_complete); 1061 NX842DEV_COUNTER_ATTR_RO(comp_failed); 1062 NX842DEV_COUNTER_ATTR_RO(decomp_complete); 1063 NX842DEV_COUNTER_ATTR_RO(decomp_failed); 1064 NX842DEV_COUNTER_ATTR_RO(swdecomp); 1065 1066 static ssize_t nx842_timehist_show(struct device *, 1067 struct device_attribute *, char *); 1068 1069 static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, 1070 nx842_timehist_show, NULL); 1071 static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, 1072 0444, nx842_timehist_show, NULL); 1073 1074 static ssize_t nx842_timehist_show(struct device *dev, 1075 struct device_attribute *attr, char *buf) { 1076 char *p = buf; 1077 struct nx842_devdata *local_devdata; 1078 atomic64_t *times; 1079 int bytes_remain = PAGE_SIZE; 1080 int bytes; 1081 int i; 1082 1083 rcu_read_lock(); 1084 local_devdata = rcu_dereference(devdata); 1085 if (!local_devdata) { 1086 rcu_read_unlock(); 1087 return 0; 1088 } 1089 1090 if (attr == &dev_attr_comp_times) 1091 times = local_devdata->counters->comp_times; 1092 else if (attr == &dev_attr_decomp_times) 1093 times = local_devdata->counters->decomp_times; 1094 else { 1095 rcu_read_unlock(); 1096 return 0; 1097 } 1098 1099 for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { 1100 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", 1101 i ? (2<<(i-1)) : 0, (2<<i)-1, 1102 atomic64_read(×[i])); 1103 bytes_remain -= bytes; 1104 p += bytes; 1105 } 1106 /* The last bucket holds everything over 1107 * 2<<(NX842_HIST_SLOTS - 2) us */ 1108 bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", 1109 2<<(NX842_HIST_SLOTS - 2), 1110 atomic64_read(×[(NX842_HIST_SLOTS - 1)])); 1111 p += bytes; 1112 1113 rcu_read_unlock(); 1114 return p - buf; 1115 } 1116 1117 static struct attribute *nx842_sysfs_entries[] = { 1118 &dev_attr_comp_complete.attr, 1119 &dev_attr_comp_failed.attr, 1120 &dev_attr_decomp_complete.attr, 1121 &dev_attr_decomp_failed.attr, 1122 &dev_attr_swdecomp.attr, 1123 &dev_attr_comp_times.attr, 1124 &dev_attr_decomp_times.attr, 1125 NULL, 1126 }; 1127 1128 static struct attribute_group nx842_attribute_group = { 1129 .name = NULL, /* put in device directory */ 1130 .attrs = nx842_sysfs_entries, 1131 }; 1132 1133 static int __init nx842_probe(struct vio_dev *viodev, 1134 const struct vio_device_id *id) 1135 { 1136 struct nx842_devdata *old_devdata, *new_devdata = NULL; 1137 unsigned long flags; 1138 int ret = 0; 1139 1140 spin_lock_irqsave(&devdata_mutex, flags); 1141 old_devdata = rcu_dereference_check(devdata, 1142 lockdep_is_held(&devdata_mutex)); 1143 1144 if (old_devdata && old_devdata->vdev != NULL) { 1145 dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); 1146 ret = -1; 1147 goto error_unlock; 1148 } 1149 1150 dev_set_drvdata(&viodev->dev, NULL); 1151 1152 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); 1153 if (!new_devdata) { 1154 dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); 1155 ret = -ENOMEM; 1156 goto error_unlock; 1157 } 1158 1159 new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), 1160 GFP_NOFS); 1161 if (!new_devdata->counters) { 1162 dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); 1163 ret = -ENOMEM; 1164 goto error_unlock; 1165 } 1166 1167 new_devdata->vdev = viodev; 1168 new_devdata->dev = &viodev->dev; 1169 nx842_OF_set_defaults(new_devdata); 1170 1171 rcu_assign_pointer(devdata, new_devdata); 1172 spin_unlock_irqrestore(&devdata_mutex, flags); 1173 synchronize_rcu(); 1174 kfree(old_devdata); 1175 1176 of_reconfig_notifier_register(&nx842_of_nb); 1177 1178 ret = nx842_OF_upd(NULL); 1179 if (ret && ret != -ENODEV) { 1180 dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); 1181 ret = -1; 1182 goto error; 1183 } 1184 1185 rcu_read_lock(); 1186 dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); 1187 rcu_read_unlock(); 1188 1189 if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { 1190 dev_err(&viodev->dev, "could not create sysfs device attributes\n"); 1191 ret = -1; 1192 goto error; 1193 } 1194 1195 return 0; 1196 1197 error_unlock: 1198 spin_unlock_irqrestore(&devdata_mutex, flags); 1199 if (new_devdata) 1200 kfree(new_devdata->counters); 1201 kfree(new_devdata); 1202 error: 1203 return ret; 1204 } 1205 1206 static int __exit nx842_remove(struct vio_dev *viodev) 1207 { 1208 struct nx842_devdata *old_devdata; 1209 unsigned long flags; 1210 1211 pr_info("Removing IBM Power 842 compression device\n"); 1212 sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); 1213 1214 spin_lock_irqsave(&devdata_mutex, flags); 1215 old_devdata = rcu_dereference_check(devdata, 1216 lockdep_is_held(&devdata_mutex)); 1217 of_reconfig_notifier_unregister(&nx842_of_nb); 1218 RCU_INIT_POINTER(devdata, NULL); 1219 spin_unlock_irqrestore(&devdata_mutex, flags); 1220 synchronize_rcu(); 1221 dev_set_drvdata(&viodev->dev, NULL); 1222 if (old_devdata) 1223 kfree(old_devdata->counters); 1224 kfree(old_devdata); 1225 return 0; 1226 } 1227 1228 static struct vio_device_id nx842_driver_ids[] = { 1229 {"ibm,compression-v1", "ibm,compression"}, 1230 {"", ""}, 1231 }; 1232 1233 static struct vio_driver nx842_driver = { 1234 .name = MODULE_NAME, 1235 .probe = nx842_probe, 1236 .remove = __exit_p(nx842_remove), 1237 .get_desired_dma = nx842_get_desired_dma, 1238 .id_table = nx842_driver_ids, 1239 }; 1240 1241 static int __init nx842_init(void) 1242 { 1243 struct nx842_devdata *new_devdata; 1244 pr_info("Registering IBM Power 842 compression driver\n"); 1245 1246 RCU_INIT_POINTER(devdata, NULL); 1247 new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); 1248 if (!new_devdata) { 1249 pr_err("Could not allocate memory for device data\n"); 1250 return -ENOMEM; 1251 } 1252 new_devdata->status = UNAVAILABLE; 1253 RCU_INIT_POINTER(devdata, new_devdata); 1254 1255 return vio_register_driver(&nx842_driver); 1256 } 1257 1258 module_init(nx842_init); 1259 1260 static void __exit nx842_exit(void) 1261 { 1262 struct nx842_devdata *old_devdata; 1263 unsigned long flags; 1264 1265 pr_info("Exiting IBM Power 842 compression driver\n"); 1266 spin_lock_irqsave(&devdata_mutex, flags); 1267 old_devdata = rcu_dereference_check(devdata, 1268 lockdep_is_held(&devdata_mutex)); 1269 RCU_INIT_POINTER(devdata, NULL); 1270 spin_unlock_irqrestore(&devdata_mutex, flags); 1271 synchronize_rcu(); 1272 if (old_devdata) 1273 dev_set_drvdata(old_devdata->dev, NULL); 1274 kfree(old_devdata); 1275 vio_unregister_driver(&nx842_driver); 1276 } 1277 1278 module_exit(nx842_exit); 1279 1280 /********************************* 1281 * 842 software decompressor 1282 *********************************/ 1283 typedef int (*sw842_template_op)(const char **, int *, unsigned char **, 1284 struct sw842_fifo *); 1285 1286 static int sw842_data8(const char **, int *, unsigned char **, 1287 struct sw842_fifo *); 1288 static int sw842_data4(const char **, int *, unsigned char **, 1289 struct sw842_fifo *); 1290 static int sw842_data2(const char **, int *, unsigned char **, 1291 struct sw842_fifo *); 1292 static int sw842_ptr8(const char **, int *, unsigned char **, 1293 struct sw842_fifo *); 1294 static int sw842_ptr4(const char **, int *, unsigned char **, 1295 struct sw842_fifo *); 1296 static int sw842_ptr2(const char **, int *, unsigned char **, 1297 struct sw842_fifo *); 1298 1299 /* special templates */ 1300 #define SW842_TMPL_REPEAT 0x1B 1301 #define SW842_TMPL_ZEROS 0x1C 1302 #define SW842_TMPL_EOF 0x1E 1303 1304 static sw842_template_op sw842_tmpl_ops[26][4] = { 1305 { sw842_data8, NULL}, /* 0 (00000) */ 1306 { sw842_data4, sw842_data2, sw842_ptr2, NULL}, 1307 { sw842_data4, sw842_ptr2, sw842_data2, NULL}, 1308 { sw842_data4, sw842_ptr2, sw842_ptr2, NULL}, 1309 { sw842_data4, sw842_ptr4, NULL}, 1310 { sw842_data2, sw842_ptr2, sw842_data4, NULL}, 1311 { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2}, 1312 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2}, 1313 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,}, 1314 { sw842_data2, sw842_ptr2, sw842_ptr4, NULL}, 1315 { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */ 1316 { sw842_ptr2, sw842_data4, sw842_ptr2, NULL}, 1317 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2}, 1318 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2}, 1319 { sw842_ptr2, sw842_data2, sw842_ptr4, NULL}, 1320 { sw842_ptr2, sw842_ptr2, sw842_data4, NULL}, 1321 { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2}, 1322 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2}, 1323 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2}, 1324 { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL}, 1325 { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */ 1326 { sw842_ptr4, sw842_data2, sw842_ptr2, NULL}, 1327 { sw842_ptr4, sw842_ptr2, sw842_data2, NULL}, 1328 { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL}, 1329 { sw842_ptr4, sw842_ptr4, NULL}, 1330 { sw842_ptr8, NULL} 1331 }; 1332 1333 /* Software decompress helpers */ 1334 1335 static uint8_t sw842_get_byte(const char *buf, int bit) 1336 { 1337 uint8_t tmpl; 1338 uint16_t tmp; 1339 tmp = htons(*(uint16_t *)(buf)); 1340 tmp = (uint16_t)(tmp << bit); 1341 tmp = ntohs(tmp); 1342 memcpy(&tmpl, &tmp, 1); 1343 return tmpl; 1344 } 1345 1346 static uint8_t sw842_get_template(const char **buf, int *bit) 1347 { 1348 uint8_t byte; 1349 byte = sw842_get_byte(*buf, *bit); 1350 byte = byte >> 3; 1351 byte &= 0x1F; 1352 *buf += (*bit + 5) / 8; 1353 *bit = (*bit + 5) % 8; 1354 return byte; 1355 } 1356 1357 /* repeat_count happens to be 5-bit too (like the template) */ 1358 static uint8_t sw842_get_repeat_count(const char **buf, int *bit) 1359 { 1360 uint8_t byte; 1361 byte = sw842_get_byte(*buf, *bit); 1362 byte = byte >> 2; 1363 byte &= 0x3F; 1364 *buf += (*bit + 6) / 8; 1365 *bit = (*bit + 6) % 8; 1366 return byte; 1367 } 1368 1369 static uint8_t sw842_get_ptr2(const char **buf, int *bit) 1370 { 1371 uint8_t ptr; 1372 ptr = sw842_get_byte(*buf, *bit); 1373 (*buf)++; 1374 return ptr; 1375 } 1376 1377 static uint16_t sw842_get_ptr4(const char **buf, int *bit, 1378 struct sw842_fifo *fifo) 1379 { 1380 uint16_t ptr; 1381 ptr = htons(*(uint16_t *)(*buf)); 1382 ptr = (uint16_t)(ptr << *bit); 1383 ptr = ptr >> 7; 1384 ptr &= 0x01FF; 1385 *buf += (*bit + 9) / 8; 1386 *bit = (*bit + 9) % 8; 1387 return ptr; 1388 } 1389 1390 static uint8_t sw842_get_ptr8(const char **buf, int *bit, 1391 struct sw842_fifo *fifo) 1392 { 1393 return sw842_get_ptr2(buf, bit); 1394 } 1395 1396 /* Software decompress template ops */ 1397 1398 static int sw842_data8(const char **inbuf, int *inbit, 1399 unsigned char **outbuf, struct sw842_fifo *fifo) 1400 { 1401 int ret; 1402 1403 ret = sw842_data4(inbuf, inbit, outbuf, fifo); 1404 if (ret) 1405 return ret; 1406 ret = sw842_data4(inbuf, inbit, outbuf, fifo); 1407 return ret; 1408 } 1409 1410 static int sw842_data4(const char **inbuf, int *inbit, 1411 unsigned char **outbuf, struct sw842_fifo *fifo) 1412 { 1413 int ret; 1414 1415 ret = sw842_data2(inbuf, inbit, outbuf, fifo); 1416 if (ret) 1417 return ret; 1418 ret = sw842_data2(inbuf, inbit, outbuf, fifo); 1419 return ret; 1420 } 1421 1422 static int sw842_data2(const char **inbuf, int *inbit, 1423 unsigned char **outbuf, struct sw842_fifo *fifo) 1424 { 1425 **outbuf = sw842_get_byte(*inbuf, *inbit); 1426 (*inbuf)++; 1427 (*outbuf)++; 1428 **outbuf = sw842_get_byte(*inbuf, *inbit); 1429 (*inbuf)++; 1430 (*outbuf)++; 1431 return 0; 1432 } 1433 1434 static int sw842_ptr8(const char **inbuf, int *inbit, 1435 unsigned char **outbuf, struct sw842_fifo *fifo) 1436 { 1437 uint8_t ptr; 1438 ptr = sw842_get_ptr8(inbuf, inbit, fifo); 1439 if (!fifo->f84_full && (ptr >= fifo->f8_count)) 1440 return 1; 1441 memcpy(*outbuf, fifo->f8[ptr], 8); 1442 *outbuf += 8; 1443 return 0; 1444 } 1445 1446 static int sw842_ptr4(const char **inbuf, int *inbit, 1447 unsigned char **outbuf, struct sw842_fifo *fifo) 1448 { 1449 uint16_t ptr; 1450 ptr = sw842_get_ptr4(inbuf, inbit, fifo); 1451 if (!fifo->f84_full && (ptr >= fifo->f4_count)) 1452 return 1; 1453 memcpy(*outbuf, fifo->f4[ptr], 4); 1454 *outbuf += 4; 1455 return 0; 1456 } 1457 1458 static int sw842_ptr2(const char **inbuf, int *inbit, 1459 unsigned char **outbuf, struct sw842_fifo *fifo) 1460 { 1461 uint8_t ptr; 1462 ptr = sw842_get_ptr2(inbuf, inbit); 1463 if (!fifo->f2_full && (ptr >= fifo->f2_count)) 1464 return 1; 1465 memcpy(*outbuf, fifo->f2[ptr], 2); 1466 *outbuf += 2; 1467 return 0; 1468 } 1469 1470 static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo) 1471 { 1472 unsigned char initial_f2count = fifo->f2_count; 1473 1474 memcpy(fifo->f8[fifo->f8_count], buf, 8); 1475 fifo->f4_count += 2; 1476 fifo->f8_count += 1; 1477 1478 if (!fifo->f84_full && fifo->f4_count >= 512) { 1479 fifo->f84_full = 1; 1480 fifo->f4_count /= 512; 1481 } 1482 1483 memcpy(fifo->f2[fifo->f2_count++], buf, 2); 1484 memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2); 1485 memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2); 1486 memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2); 1487 if (fifo->f2_count < initial_f2count) 1488 fifo->f2_full = 1; 1489 } 1490 1491 static int sw842_decompress(const unsigned char *src, int srclen, 1492 unsigned char *dst, int *destlen, 1493 const void *wrkmem) 1494 { 1495 uint8_t tmpl; 1496 const char *inbuf; 1497 int inbit = 0; 1498 unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf; 1499 const char *inbuf_end; 1500 sw842_template_op op; 1501 int opindex; 1502 int i, repeat_count; 1503 struct sw842_fifo *fifo; 1504 int ret = 0; 1505 1506 fifo = &((struct nx842_workmem *)(wrkmem))->swfifo; 1507 memset(fifo, 0, sizeof(*fifo)); 1508 1509 origbuf = NULL; 1510 inbuf = src; 1511 inbuf_end = src + srclen; 1512 outbuf = dst; 1513 outbuf_end = dst + *destlen; 1514 1515 while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) { 1516 if (inbuf >= inbuf_end) { 1517 ret = -EINVAL; 1518 goto out; 1519 } 1520 1521 opindex = 0; 1522 prevbuf = origbuf; 1523 origbuf = outbuf; 1524 switch (tmpl) { 1525 case SW842_TMPL_REPEAT: 1526 if (prevbuf == NULL) { 1527 ret = -EINVAL; 1528 goto out; 1529 } 1530 1531 repeat_count = sw842_get_repeat_count(&inbuf, 1532 &inbit) + 1; 1533 1534 /* Did the repeat count advance past the end of input */ 1535 if (inbuf > inbuf_end) { 1536 ret = -EINVAL; 1537 goto out; 1538 } 1539 1540 for (i = 0; i < repeat_count; i++) { 1541 /* Would this overflow the output buffer */ 1542 if ((outbuf + 8) > outbuf_end) { 1543 ret = -ENOSPC; 1544 goto out; 1545 } 1546 1547 memcpy(outbuf, prevbuf, 8); 1548 sw842_copy_to_fifo(outbuf, fifo); 1549 outbuf += 8; 1550 } 1551 break; 1552 1553 case SW842_TMPL_ZEROS: 1554 /* Would this overflow the output buffer */ 1555 if ((outbuf + 8) > outbuf_end) { 1556 ret = -ENOSPC; 1557 goto out; 1558 } 1559 1560 memset(outbuf, 0, 8); 1561 sw842_copy_to_fifo(outbuf, fifo); 1562 outbuf += 8; 1563 break; 1564 1565 default: 1566 if (tmpl > 25) { 1567 ret = -EINVAL; 1568 goto out; 1569 } 1570 1571 /* Does this go past the end of the input buffer */ 1572 if ((inbuf + 2) > inbuf_end) { 1573 ret = -EINVAL; 1574 goto out; 1575 } 1576 1577 /* Would this overflow the output buffer */ 1578 if ((outbuf + 8) > outbuf_end) { 1579 ret = -ENOSPC; 1580 goto out; 1581 } 1582 1583 while (opindex < 4 && 1584 (op = sw842_tmpl_ops[tmpl][opindex++]) 1585 != NULL) { 1586 ret = (*op)(&inbuf, &inbit, &outbuf, fifo); 1587 if (ret) { 1588 ret = -EINVAL; 1589 goto out; 1590 } 1591 sw842_copy_to_fifo(origbuf, fifo); 1592 } 1593 } 1594 } 1595 1596 out: 1597 if (!ret) 1598 *destlen = (unsigned int)(outbuf - dst); 1599 else 1600 *destlen = 0; 1601 1602 return ret; 1603 } 1604