1 /* 2 * Driver for IBM Power 842 compression accelerator 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 * 18 * Copyright (C) IBM Corporation, 2012 19 * 20 * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> 21 * Seth Jennings <sjenning@linux.vnet.ibm.com> 22 */ 23 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/nx842.h> 27 #include <linux/of.h> 28 #include <linux/slab.h> 29 30 #include <asm/page.h> 31 #include <asm/vio.h> 32 33 #include "nx_csbcpb.h" /* struct nx_csbcpb */ 34 35 #define MODULE_NAME "nx-compress" 36 MODULE_LICENSE("GPL"); 37 MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); 38 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); 39 40 #define SHIFT_4K 12 41 #define SHIFT_64K 16 42 #define SIZE_4K (1UL << SHIFT_4K) 43 #define SIZE_64K (1UL << SHIFT_64K) 44 45 /* IO buffer must be 128 byte aligned */ 46 #define IO_BUFFER_ALIGN 128 47 48 struct nx842_header { 49 int blocks_nr; /* number of compressed blocks */ 50 int offset; /* offset of the first block (from beginning of header) */ 51 int sizes[0]; /* size of compressed blocks */ 52 }; 53 54 static inline int nx842_header_size(const struct nx842_header *hdr) 55 { 56 return sizeof(struct nx842_header) + 57 hdr->blocks_nr * sizeof(hdr->sizes[0]); 58 } 59 60 /* Macros for fields within nx_csbcpb */ 61 /* Check the valid bit within the csbcpb valid field */ 62 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) 63 64 /* CE macros operate on the completion_extension field bits in the csbcpb. 65 * CE0 0=full completion, 1=partial completion 66 * CE1 0=CE0 indicates completion, 1=termination (output may be modified) 67 * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ 68 #define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) 69 #define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) 70 #define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) 71 72 /* The NX unit accepts data only on 4K page boundaries */ 73 #define NX842_HW_PAGE_SHIFT SHIFT_4K 74 #define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT) 75 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) 76 77 enum nx842_status { 78 UNAVAILABLE, 79 AVAILABLE 80 }; 81 82 struct ibm_nx842_counters { 83 atomic64_t comp_complete; 84 atomic64_t comp_failed; 85 atomic64_t decomp_complete; 86 atomic64_t decomp_failed; 87 atomic64_t swdecomp; 88 atomic64_t comp_times[32]; 89 atomic64_t decomp_times[32]; 90 }; 91 92 static struct nx842_devdata { 93 struct vio_dev *vdev; 94 struct device *dev; 95 struct ibm_nx842_counters *counters; 96 unsigned int max_sg_len; 97 unsigned int max_sync_size; 98 unsigned int max_sync_sg; 99 enum nx842_status status; 100 } __rcu *devdata; 101 static DEFINE_SPINLOCK(devdata_mutex); 102 103 #define NX842_COUNTER_INC(_x) \ 104 static inline void nx842_inc_##_x( \ 105 const struct nx842_devdata *dev) { \ 106 if (dev) \ 107 atomic64_inc(&dev->counters->_x); \ 108 } 109 NX842_COUNTER_INC(comp_complete); 110 NX842_COUNTER_INC(comp_failed); 111 NX842_COUNTER_INC(decomp_complete); 112 NX842_COUNTER_INC(decomp_failed); 113 NX842_COUNTER_INC(swdecomp); 114 115 #define NX842_HIST_SLOTS 16 116 117 static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) 118 { 119 int bucket = fls(time); 120 121 if (bucket) 122 bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); 123 124 atomic64_inc(×[bucket]); 125 } 126 127 /* NX unit operation flags */ 128 #define NX842_OP_COMPRESS 0x0 129 #define NX842_OP_CRC 0x1 130 #define NX842_OP_DECOMPRESS 0x2 131 #define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) 132 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) 133 #define NX842_OP_ASYNC (1<<23) 134 #define NX842_OP_NOTIFY (1<<22) 135 #define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) 136 137 static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) 138 { 139 /* No use of DMA mappings within the driver. */ 140 return 0; 141 } 142 143 struct nx842_slentry { 144 unsigned long ptr; /* Real address (use __pa()) */ 145 unsigned long len; 146 }; 147 148 /* pHyp scatterlist entry */ 149 struct nx842_scatterlist { 150 int entry_nr; /* number of slentries */ 151 struct nx842_slentry *entries; /* ptr to array of slentries */ 152 }; 153 154 /* Does not include sizeof(entry_nr) in the size */ 155 static inline unsigned long nx842_get_scatterlist_size( 156 struct nx842_scatterlist *sl) 157 { 158 return sl->entry_nr * sizeof(struct nx842_slentry); 159 } 160 161 static inline unsigned long nx842_get_pa(void *addr) 162 { 163 if (is_vmalloc_addr(addr)) 164 return page_to_phys(vmalloc_to_page(addr)) 165 + offset_in_page(addr); 166 else 167 return __pa(addr); 168 } 169 170 static int nx842_build_scatterlist(unsigned long buf, int len, 171 struct nx842_scatterlist *sl) 172 { 173 unsigned long nextpage; 174 struct nx842_slentry *entry; 175 176 sl->entry_nr = 0; 177 178 entry = sl->entries; 179 while (len) { 180 entry->ptr = nx842_get_pa((void *)buf); 181 nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); 182 if (nextpage < buf + len) { 183 /* we aren't at the end yet */ 184 if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) 185 /* we are in the middle (or beginning) */ 186 entry->len = NX842_HW_PAGE_SIZE; 187 else 188 /* we are at the beginning */ 189 entry->len = nextpage - buf; 190 } else { 191 /* at the end */ 192 entry->len = len; 193 } 194 195 len -= entry->len; 196 buf += entry->len; 197 sl->entry_nr++; 198 entry++; 199 } 200 201 return 0; 202 } 203 204 /* 205 * Working memory for software decompression 206 */ 207 struct sw842_fifo { 208 union { 209 char f8[256][8]; 210 char f4[512][4]; 211 }; 212 char f2[256][2]; 213 unsigned char f84_full; 214 unsigned char f2_full; 215 unsigned char f8_count; 216 unsigned char f2_count; 217 unsigned int f4_count; 218 }; 219 220 /* 221 * Working memory for crypto API 222 */ 223 struct nx842_workmem { 224 char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */ 225 union { 226 /* hardware working memory */ 227 struct { 228 /* scatterlist */ 229 char slin[SIZE_4K]; 230 char slout[SIZE_4K]; 231 /* coprocessor status/parameter block */ 232 struct nx_csbcpb csbcpb; 233 }; 234 /* software working memory */ 235 struct sw842_fifo swfifo; /* software decompression fifo */ 236 }; 237 }; 238 239 int nx842_get_workmem_size(void) 240 { 241 return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE; 242 } 243 EXPORT_SYMBOL_GPL(nx842_get_workmem_size); 244 245 int nx842_get_workmem_size_aligned(void) 246 { 247 return sizeof(struct nx842_workmem); 248 } 249 EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned); 250 251 static int nx842_validate_result(struct device *dev, 252 struct cop_status_block *csb) 253 { 254 /* The csb must be valid after returning from vio_h_cop_sync */ 255 if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { 256 dev_err(dev, "%s: cspcbp not valid upon completion.\n", 257 __func__); 258 dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", 259 csb->valid, 260 csb->crb_seq_number, 261 csb->completion_code, 262 csb->completion_extension); 263 dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", 264 csb->processed_byte_count, 265 (unsigned long)csb->address); 266 return -EIO; 267 } 268 269 /* Check return values from the hardware in the CSB */ 270 switch (csb->completion_code) { 271 case 0: /* Completed without error */ 272 break; 273 case 64: /* Target bytes > Source bytes during compression */ 274 case 13: /* Output buffer too small */ 275 dev_dbg(dev, "%s: Compression output larger than input\n", 276 __func__); 277 return -ENOSPC; 278 case 66: /* Input data contains an illegal template field */ 279 case 67: /* Template indicates data past the end of the input stream */ 280 dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", 281 __func__, csb->completion_code); 282 return -EINVAL; 283 default: 284 dev_dbg(dev, "%s: Unspecified error (code:%d)\n", 285 __func__, csb->completion_code); 286 return -EIO; 287 } 288 289 /* Hardware sanity check */ 290 if (!NX842_CSBCPB_CE2(csb->completion_extension)) { 291 dev_err(dev, "%s: No error returned by hardware, but " 292 "data returned is unusable, contact support.\n" 293 "(Additional info: csbcbp->processed bytes " 294 "does not specify processed bytes for the " 295 "target buffer.)\n", __func__); 296 return -EIO; 297 } 298 299 return 0; 300 } 301 302 /** 303 * nx842_compress - Compress data using the 842 algorithm 304 * 305 * Compression provide by the NX842 coprocessor on IBM Power systems. 306 * The input buffer is compressed and the result is stored in the 307 * provided output buffer. 308 * 309 * Upon return from this function @outlen contains the length of the 310 * compressed data. If there is an error then @outlen will be 0 and an 311 * error will be specified by the return code from this function. 312 * 313 * @in: Pointer to input buffer, must be page aligned 314 * @inlen: Length of input buffer, must be PAGE_SIZE 315 * @out: Pointer to output buffer 316 * @outlen: Length of output buffer 317 * @wrkmem: ptr to buffer for working memory, size determined by 318 * nx842_get_workmem_size() 319 * 320 * Returns: 321 * 0 Success, output of length @outlen stored in the buffer at @out 322 * -ENOMEM Unable to allocate internal buffers 323 * -ENOSPC Output buffer is to small 324 * -EMSGSIZE XXX Difficult to describe this limitation 325 * -EIO Internal error 326 * -ENODEV Hardware unavailable 327 */ 328 int nx842_compress(const unsigned char *in, unsigned int inlen, 329 unsigned char *out, unsigned int *outlen, void *wmem) 330 { 331 struct nx842_header *hdr; 332 struct nx842_devdata *local_devdata; 333 struct device *dev = NULL; 334 struct nx842_workmem *workmem; 335 struct nx842_scatterlist slin, slout; 336 struct nx_csbcpb *csbcpb; 337 int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; 338 unsigned long inbuf, outbuf, padding; 339 struct vio_pfo_op op = { 340 .done = NULL, 341 .handle = 0, 342 .timeout = 0, 343 }; 344 unsigned long start_time = get_tb(); 345 346 /* 347 * Make sure input buffer is 64k page aligned. This is assumed since 348 * this driver is designed for page compression only (for now). This 349 * is very nice since we can now use direct DDE(s) for the input and 350 * the alignment is guaranteed. 351 */ 352 inbuf = (unsigned long)in; 353 if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) 354 return -EINVAL; 355 356 rcu_read_lock(); 357 local_devdata = rcu_dereference(devdata); 358 if (!local_devdata || !local_devdata->dev) { 359 rcu_read_unlock(); 360 return -ENODEV; 361 } 362 max_sync_size = local_devdata->max_sync_size; 363 dev = local_devdata->dev; 364 365 /* Create the header */ 366 hdr = (struct nx842_header *)out; 367 hdr->blocks_nr = PAGE_SIZE / max_sync_size; 368 hdrsize = nx842_header_size(hdr); 369 outbuf = (unsigned long)out + hdrsize; 370 bytesleft = *outlen - hdrsize; 371 372 /* Init scatterlist */ 373 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 374 NX842_HW_PAGE_SIZE); 375 slin.entries = (struct nx842_slentry *)workmem->slin; 376 slout.entries = (struct nx842_slentry *)workmem->slout; 377 378 /* Init operation */ 379 op.flags = NX842_OP_COMPRESS; 380 csbcpb = &workmem->csbcpb; 381 memset(csbcpb, 0, sizeof(*csbcpb)); 382 op.csbcpb = nx842_get_pa(csbcpb); 383 op.out = nx842_get_pa(slout.entries); 384 385 for (i = 0; i < hdr->blocks_nr; i++) { 386 /* 387 * Aligning the output blocks to 128 bytes does waste space, 388 * but it prevents the need for bounce buffers and memory 389 * copies. It also simplifies the code a lot. In the worst 390 * case (64k page, 4k max_sync_size), you lose up to 391 * (128*16)/64k = ~3% the compression factor. For 64k 392 * max_sync_size, the loss would be at most 128/64k = ~0.2%. 393 */ 394 padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; 395 outbuf += padding; 396 bytesleft -= padding; 397 if (i == 0) 398 /* save offset into first block in header */ 399 hdr->offset = padding + hdrsize; 400 401 if (bytesleft <= 0) { 402 ret = -ENOSPC; 403 goto unlock; 404 } 405 406 /* 407 * NOTE: If the default max_sync_size is changed from 4k 408 * to 64k, remove the "likely" case below, since a 409 * scatterlist will always be needed. 410 */ 411 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { 412 /* Create direct DDE */ 413 op.in = nx842_get_pa((void *)inbuf); 414 op.inlen = max_sync_size; 415 416 } else { 417 /* Create indirect DDE (scatterlist) */ 418 nx842_build_scatterlist(inbuf, max_sync_size, &slin); 419 op.in = nx842_get_pa(slin.entries); 420 op.inlen = -nx842_get_scatterlist_size(&slin); 421 } 422 423 /* 424 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect 425 * DDE is required for the outbuf. 426 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must 427 * also be page aligned (1 in 128/4k=32 chance) in order 428 * to use a direct DDE. 429 * This is unlikely, just use an indirect DDE always. 430 */ 431 nx842_build_scatterlist(outbuf, 432 min(bytesleft, max_sync_size), &slout); 433 /* op.out set before loop */ 434 op.outlen = -nx842_get_scatterlist_size(&slout); 435 436 /* Send request to pHyp */ 437 ret = vio_h_cop_sync(local_devdata->vdev, &op); 438 439 /* Check for pHyp error */ 440 if (ret) { 441 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", 442 __func__, ret, op.hcall_err); 443 ret = -EIO; 444 goto unlock; 445 } 446 447 /* Check for hardware error */ 448 ret = nx842_validate_result(dev, &csbcpb->csb); 449 if (ret && ret != -ENOSPC) 450 goto unlock; 451 452 /* Handle incompressible data */ 453 if (unlikely(ret == -ENOSPC)) { 454 if (bytesleft < max_sync_size) { 455 /* 456 * Not enough space left in the output buffer 457 * to store uncompressed block 458 */ 459 goto unlock; 460 } else { 461 /* Store incompressible block */ 462 memcpy((void *)outbuf, (void *)inbuf, 463 max_sync_size); 464 hdr->sizes[i] = -max_sync_size; 465 outbuf += max_sync_size; 466 bytesleft -= max_sync_size; 467 /* Reset ret, incompressible data handled */ 468 ret = 0; 469 } 470 } else { 471 /* Normal case, compression was successful */ 472 size = csbcpb->csb.processed_byte_count; 473 dev_dbg(dev, "%s: processed_bytes=%d\n", 474 __func__, size); 475 hdr->sizes[i] = size; 476 outbuf += size; 477 bytesleft -= size; 478 } 479 480 inbuf += max_sync_size; 481 } 482 483 *outlen = (unsigned int)(outbuf - (unsigned long)out); 484 485 unlock: 486 if (ret) 487 nx842_inc_comp_failed(local_devdata); 488 else { 489 nx842_inc_comp_complete(local_devdata); 490 ibm_nx842_incr_hist(local_devdata->counters->comp_times, 491 (get_tb() - start_time) / tb_ticks_per_usec); 492 } 493 rcu_read_unlock(); 494 return ret; 495 } 496 EXPORT_SYMBOL_GPL(nx842_compress); 497 498 static int sw842_decompress(const unsigned char *, int, unsigned char *, int *, 499 const void *); 500 501 /** 502 * nx842_decompress - Decompress data using the 842 algorithm 503 * 504 * Decompression provide by the NX842 coprocessor on IBM Power systems. 505 * The input buffer is decompressed and the result is stored in the 506 * provided output buffer. The size allocated to the output buffer is 507 * provided by the caller of this function in @outlen. Upon return from 508 * this function @outlen contains the length of the decompressed data. 509 * If there is an error then @outlen will be 0 and an error will be 510 * specified by the return code from this function. 511 * 512 * @in: Pointer to input buffer, will use bounce buffer if not 128 byte 513 * aligned 514 * @inlen: Length of input buffer 515 * @out: Pointer to output buffer, must be page aligned 516 * @outlen: Length of output buffer, must be PAGE_SIZE 517 * @wrkmem: ptr to buffer for working memory, size determined by 518 * nx842_get_workmem_size() 519 * 520 * Returns: 521 * 0 Success, output of length @outlen stored in the buffer at @out 522 * -ENODEV Hardware decompression device is unavailable 523 * -ENOMEM Unable to allocate internal buffers 524 * -ENOSPC Output buffer is to small 525 * -EINVAL Bad input data encountered when attempting decompress 526 * -EIO Internal error 527 */ 528 int nx842_decompress(const unsigned char *in, unsigned int inlen, 529 unsigned char *out, unsigned int *outlen, void *wmem) 530 { 531 struct nx842_header *hdr; 532 struct nx842_devdata *local_devdata; 533 struct device *dev = NULL; 534 struct nx842_workmem *workmem; 535 struct nx842_scatterlist slin, slout; 536 struct nx_csbcpb *csbcpb; 537 int ret = 0, i, size, max_sync_size; 538 unsigned long inbuf, outbuf; 539 struct vio_pfo_op op = { 540 .done = NULL, 541 .handle = 0, 542 .timeout = 0, 543 }; 544 unsigned long start_time = get_tb(); 545 546 /* Ensure page alignment and size */ 547 outbuf = (unsigned long)out; 548 if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) 549 return -EINVAL; 550 551 rcu_read_lock(); 552 local_devdata = rcu_dereference(devdata); 553 if (local_devdata) 554 dev = local_devdata->dev; 555 556 /* Get header */ 557 hdr = (struct nx842_header *)in; 558 559 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 560 NX842_HW_PAGE_SIZE); 561 562 inbuf = (unsigned long)in + hdr->offset; 563 if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) { 564 /* Copy block(s) into bounce buffer for alignment */ 565 memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset); 566 inbuf = (unsigned long)workmem->bounce; 567 } 568 569 /* Init scatterlist */ 570 slin.entries = (struct nx842_slentry *)workmem->slin; 571 slout.entries = (struct nx842_slentry *)workmem->slout; 572 573 /* Init operation */ 574 op.flags = NX842_OP_DECOMPRESS; 575 csbcpb = &workmem->csbcpb; 576 memset(csbcpb, 0, sizeof(*csbcpb)); 577 op.csbcpb = nx842_get_pa(csbcpb); 578 579 /* 580 * max_sync_size may have changed since compression, 581 * so we can't read it from the device info. We need 582 * to derive it from hdr->blocks_nr. 583 */ 584 max_sync_size = PAGE_SIZE / hdr->blocks_nr; 585 586 for (i = 0; i < hdr->blocks_nr; i++) { 587 /* Skip padding */ 588 inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); 589 590 if (hdr->sizes[i] < 0) { 591 /* Negative sizes indicate uncompressed data blocks */ 592 size = abs(hdr->sizes[i]); 593 memcpy((void *)outbuf, (void *)inbuf, size); 594 outbuf += size; 595 inbuf += size; 596 continue; 597 } 598 599 if (!dev) 600 goto sw; 601 602 /* 603 * The better the compression, the more likely the "likely" 604 * case becomes. 605 */ 606 if (likely((inbuf & NX842_HW_PAGE_MASK) == 607 ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) { 608 /* Create direct DDE */ 609 op.in = nx842_get_pa((void *)inbuf); 610 op.inlen = hdr->sizes[i]; 611 } else { 612 /* Create indirect DDE (scatterlist) */ 613 nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin); 614 op.in = nx842_get_pa(slin.entries); 615 op.inlen = -nx842_get_scatterlist_size(&slin); 616 } 617 618 /* 619 * NOTE: If the default max_sync_size is changed from 4k 620 * to 64k, remove the "likely" case below, since a 621 * scatterlist will always be needed. 622 */ 623 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { 624 /* Create direct DDE */ 625 op.out = nx842_get_pa((void *)outbuf); 626 op.outlen = max_sync_size; 627 } else { 628 /* Create indirect DDE (scatterlist) */ 629 nx842_build_scatterlist(outbuf, max_sync_size, &slout); 630 op.out = nx842_get_pa(slout.entries); 631 op.outlen = -nx842_get_scatterlist_size(&slout); 632 } 633 634 /* Send request to pHyp */ 635 ret = vio_h_cop_sync(local_devdata->vdev, &op); 636 637 /* Check for pHyp error */ 638 if (ret) { 639 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", 640 __func__, ret, op.hcall_err); 641 dev = NULL; 642 goto sw; 643 } 644 645 /* Check for hardware error */ 646 ret = nx842_validate_result(dev, &csbcpb->csb); 647 if (ret) { 648 dev = NULL; 649 goto sw; 650 } 651 652 /* HW decompression success */ 653 inbuf += hdr->sizes[i]; 654 outbuf += csbcpb->csb.processed_byte_count; 655 continue; 656 657 sw: 658 /* software decompression */ 659 size = max_sync_size; 660 ret = sw842_decompress( 661 (unsigned char *)inbuf, hdr->sizes[i], 662 (unsigned char *)outbuf, &size, wmem); 663 if (ret) 664 pr_debug("%s: sw842_decompress failed with %d\n", 665 __func__, ret); 666 667 if (ret) { 668 if (ret != -ENOSPC && ret != -EINVAL && 669 ret != -EMSGSIZE) 670 ret = -EIO; 671 goto unlock; 672 } 673 674 /* SW decompression success */ 675 inbuf += hdr->sizes[i]; 676 outbuf += size; 677 } 678 679 *outlen = (unsigned int)(outbuf - (unsigned long)out); 680 681 unlock: 682 if (ret) 683 /* decompress fail */ 684 nx842_inc_decomp_failed(local_devdata); 685 else { 686 if (!dev) 687 /* software decompress */ 688 nx842_inc_swdecomp(local_devdata); 689 nx842_inc_decomp_complete(local_devdata); 690 ibm_nx842_incr_hist(local_devdata->counters->decomp_times, 691 (get_tb() - start_time) / tb_ticks_per_usec); 692 } 693 694 rcu_read_unlock(); 695 return ret; 696 } 697 EXPORT_SYMBOL_GPL(nx842_decompress); 698 699 /** 700 * nx842_OF_set_defaults -- Set default (disabled) values for devdata 701 * 702 * @devdata - struct nx842_devdata to update 703 * 704 * Returns: 705 * 0 on success 706 * -ENOENT if @devdata ptr is NULL 707 */ 708 static int nx842_OF_set_defaults(struct nx842_devdata *devdata) 709 { 710 if (devdata) { 711 devdata->max_sync_size = 0; 712 devdata->max_sync_sg = 0; 713 devdata->max_sg_len = 0; 714 devdata->status = UNAVAILABLE; 715 return 0; 716 } else 717 return -ENOENT; 718 } 719 720 /** 721 * nx842_OF_upd_status -- Update the device info from OF status prop 722 * 723 * The status property indicates if the accelerator is enabled. If the 724 * device is in the OF tree it indicates that the hardware is present. 725 * The status field indicates if the device is enabled when the status 726 * is 'okay'. Otherwise the device driver will be disabled. 727 * 728 * @devdata - struct nx842_devdata to update 729 * @prop - struct property point containing the maxsyncop for the update 730 * 731 * Returns: 732 * 0 - Device is available 733 * -EINVAL - Device is not available 734 */ 735 static int nx842_OF_upd_status(struct nx842_devdata *devdata, 736 struct property *prop) { 737 int ret = 0; 738 const char *status = (const char *)prop->value; 739 740 if (!strncmp(status, "okay", (size_t)prop->length)) { 741 devdata->status = AVAILABLE; 742 } else { 743 dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", 744 __func__, status); 745 devdata->status = UNAVAILABLE; 746 } 747 748 return ret; 749 } 750 751 /** 752 * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop 753 * 754 * Definition of the 'ibm,max-sg-len' OF property: 755 * This field indicates the maximum byte length of a scatter list 756 * for the platform facility. It is a single cell encoded as with encode-int. 757 * 758 * Example: 759 * # od -x ibm,max-sg-len 760 * 0000000 0000 0ff0 761 * 762 * In this example, the maximum byte length of a scatter list is 763 * 0x0ff0 (4,080). 764 * 765 * @devdata - struct nx842_devdata to update 766 * @prop - struct property point containing the maxsyncop for the update 767 * 768 * Returns: 769 * 0 on success 770 * -EINVAL on failure 771 */ 772 static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, 773 struct property *prop) { 774 int ret = 0; 775 const int *maxsglen = prop->value; 776 777 if (prop->length != sizeof(*maxsglen)) { 778 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); 779 dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, 780 prop->length, sizeof(*maxsglen)); 781 ret = -EINVAL; 782 } else { 783 devdata->max_sg_len = (unsigned int)min(*maxsglen, 784 (int)NX842_HW_PAGE_SIZE); 785 } 786 787 return ret; 788 } 789 790 /** 791 * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop 792 * 793 * Definition of the 'ibm,max-sync-cop' OF property: 794 * Two series of cells. The first series of cells represents the maximums 795 * that can be synchronously compressed. The second series of cells 796 * represents the maximums that can be synchronously decompressed. 797 * 1. The first cell in each series contains the count of the number of 798 * data length, scatter list elements pairs that follow – each being 799 * of the form 800 * a. One cell data byte length 801 * b. One cell total number of scatter list elements 802 * 803 * Example: 804 * # od -x ibm,max-sync-cop 805 * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 806 * 0000020 0000 1000 0000 01fe 807 * 808 * In this example, compression supports 0x1000 (4,096) data byte length 809 * and 0x1fe (510) total scatter list elements. Decompression supports 810 * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list 811 * elements. 812 * 813 * @devdata - struct nx842_devdata to update 814 * @prop - struct property point containing the maxsyncop for the update 815 * 816 * Returns: 817 * 0 on success 818 * -EINVAL on failure 819 */ 820 static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, 821 struct property *prop) { 822 int ret = 0; 823 const struct maxsynccop_t { 824 int comp_elements; 825 int comp_data_limit; 826 int comp_sg_limit; 827 int decomp_elements; 828 int decomp_data_limit; 829 int decomp_sg_limit; 830 } *maxsynccop; 831 832 if (prop->length != sizeof(*maxsynccop)) { 833 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); 834 dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, 835 sizeof(*maxsynccop)); 836 ret = -EINVAL; 837 goto out; 838 } 839 840 maxsynccop = (const struct maxsynccop_t *)prop->value; 841 842 /* Use one limit rather than separate limits for compression and 843 * decompression. Set a maximum for this so as not to exceed the 844 * size that the header can support and round the value down to 845 * the hardware page size (4K) */ 846 devdata->max_sync_size = 847 (unsigned int)min(maxsynccop->comp_data_limit, 848 maxsynccop->decomp_data_limit); 849 850 devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, 851 SIZE_64K); 852 853 if (devdata->max_sync_size < SIZE_4K) { 854 dev_err(devdata->dev, "%s: hardware max data size (%u) is " 855 "less than the driver minimum, unable to use " 856 "the hardware device\n", 857 __func__, devdata->max_sync_size); 858 ret = -EINVAL; 859 goto out; 860 } 861 862 devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, 863 maxsynccop->decomp_sg_limit); 864 if (devdata->max_sync_sg < 1) { 865 dev_err(devdata->dev, "%s: hardware max sg size (%u) is " 866 "less than the driver minimum, unable to use " 867 "the hardware device\n", 868 __func__, devdata->max_sync_sg); 869 ret = -EINVAL; 870 goto out; 871 } 872 873 out: 874 return ret; 875 } 876 877 /** 878 * 879 * nx842_OF_upd -- Handle OF properties updates for the device. 880 * 881 * Set all properties from the OF tree. Optionally, a new property 882 * can be provided by the @new_prop pointer to overwrite an existing value. 883 * The device will remain disabled until all values are valid, this function 884 * will return an error for updates unless all values are valid. 885 * 886 * @new_prop: If not NULL, this property is being updated. If NULL, update 887 * all properties from the current values in the OF tree. 888 * 889 * Returns: 890 * 0 - Success 891 * -ENOMEM - Could not allocate memory for new devdata structure 892 * -EINVAL - property value not found, new_prop is not a recognized 893 * property for the device or property value is not valid. 894 * -ENODEV - Device is not available 895 */ 896 static int nx842_OF_upd(struct property *new_prop) 897 { 898 struct nx842_devdata *old_devdata = NULL; 899 struct nx842_devdata *new_devdata = NULL; 900 struct device_node *of_node = NULL; 901 struct property *status = NULL; 902 struct property *maxsglen = NULL; 903 struct property *maxsyncop = NULL; 904 int ret = 0; 905 unsigned long flags; 906 907 spin_lock_irqsave(&devdata_mutex, flags); 908 old_devdata = rcu_dereference_check(devdata, 909 lockdep_is_held(&devdata_mutex)); 910 if (old_devdata) 911 of_node = old_devdata->dev->of_node; 912 913 if (!old_devdata || !of_node) { 914 pr_err("%s: device is not available\n", __func__); 915 spin_unlock_irqrestore(&devdata_mutex, flags); 916 return -ENODEV; 917 } 918 919 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); 920 if (!new_devdata) { 921 dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); 922 ret = -ENOMEM; 923 goto error_out; 924 } 925 926 memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); 927 new_devdata->counters = old_devdata->counters; 928 929 /* Set ptrs for existing properties */ 930 status = of_find_property(of_node, "status", NULL); 931 maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); 932 maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); 933 if (!status || !maxsglen || !maxsyncop) { 934 dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); 935 ret = -EINVAL; 936 goto error_out; 937 } 938 939 /* Set ptr to new property if provided */ 940 if (new_prop) { 941 /* Single property */ 942 if (!strncmp(new_prop->name, "status", new_prop->length)) { 943 status = new_prop; 944 945 } else if (!strncmp(new_prop->name, "ibm,max-sg-len", 946 new_prop->length)) { 947 maxsglen = new_prop; 948 949 } else if (!strncmp(new_prop->name, "ibm,max-sync-cop", 950 new_prop->length)) { 951 maxsyncop = new_prop; 952 953 } else { 954 /* 955 * Skip the update, the property being updated 956 * has no impact. 957 */ 958 goto out; 959 } 960 } 961 962 /* Perform property updates */ 963 ret = nx842_OF_upd_status(new_devdata, status); 964 if (ret) 965 goto error_out; 966 967 ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); 968 if (ret) 969 goto error_out; 970 971 ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); 972 if (ret) 973 goto error_out; 974 975 out: 976 dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", 977 __func__, new_devdata->max_sync_size, 978 old_devdata->max_sync_size); 979 dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", 980 __func__, new_devdata->max_sync_sg, 981 old_devdata->max_sync_sg); 982 dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", 983 __func__, new_devdata->max_sg_len, 984 old_devdata->max_sg_len); 985 986 rcu_assign_pointer(devdata, new_devdata); 987 spin_unlock_irqrestore(&devdata_mutex, flags); 988 synchronize_rcu(); 989 dev_set_drvdata(new_devdata->dev, new_devdata); 990 kfree(old_devdata); 991 return 0; 992 993 error_out: 994 if (new_devdata) { 995 dev_info(old_devdata->dev, "%s: device disabled\n", __func__); 996 nx842_OF_set_defaults(new_devdata); 997 rcu_assign_pointer(devdata, new_devdata); 998 spin_unlock_irqrestore(&devdata_mutex, flags); 999 synchronize_rcu(); 1000 dev_set_drvdata(new_devdata->dev, new_devdata); 1001 kfree(old_devdata); 1002 } else { 1003 dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); 1004 spin_unlock_irqrestore(&devdata_mutex, flags); 1005 } 1006 1007 if (!ret) 1008 ret = -EINVAL; 1009 return ret; 1010 } 1011 1012 /** 1013 * nx842_OF_notifier - Process updates to OF properties for the device 1014 * 1015 * @np: notifier block 1016 * @action: notifier action 1017 * @update: struct pSeries_reconfig_prop_update pointer if action is 1018 * PSERIES_UPDATE_PROPERTY 1019 * 1020 * Returns: 1021 * NOTIFY_OK on success 1022 * NOTIFY_BAD encoded with error number on failure, use 1023 * notifier_to_errno() to decode this value 1024 */ 1025 static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, 1026 void *update) 1027 { 1028 struct of_prop_reconfig *upd = update; 1029 struct nx842_devdata *local_devdata; 1030 struct device_node *node = NULL; 1031 1032 rcu_read_lock(); 1033 local_devdata = rcu_dereference(devdata); 1034 if (local_devdata) 1035 node = local_devdata->dev->of_node; 1036 1037 if (local_devdata && 1038 action == OF_RECONFIG_UPDATE_PROPERTY && 1039 !strcmp(upd->dn->name, node->name)) { 1040 rcu_read_unlock(); 1041 nx842_OF_upd(upd->prop); 1042 } else 1043 rcu_read_unlock(); 1044 1045 return NOTIFY_OK; 1046 } 1047 1048 static struct notifier_block nx842_of_nb = { 1049 .notifier_call = nx842_OF_notifier, 1050 }; 1051 1052 #define nx842_counter_read(_name) \ 1053 static ssize_t nx842_##_name##_show(struct device *dev, \ 1054 struct device_attribute *attr, \ 1055 char *buf) { \ 1056 struct nx842_devdata *local_devdata; \ 1057 int p = 0; \ 1058 rcu_read_lock(); \ 1059 local_devdata = rcu_dereference(devdata); \ 1060 if (local_devdata) \ 1061 p = snprintf(buf, PAGE_SIZE, "%ld\n", \ 1062 atomic64_read(&local_devdata->counters->_name)); \ 1063 rcu_read_unlock(); \ 1064 return p; \ 1065 } 1066 1067 #define NX842DEV_COUNTER_ATTR_RO(_name) \ 1068 nx842_counter_read(_name); \ 1069 static struct device_attribute dev_attr_##_name = __ATTR(_name, \ 1070 0444, \ 1071 nx842_##_name##_show,\ 1072 NULL); 1073 1074 NX842DEV_COUNTER_ATTR_RO(comp_complete); 1075 NX842DEV_COUNTER_ATTR_RO(comp_failed); 1076 NX842DEV_COUNTER_ATTR_RO(decomp_complete); 1077 NX842DEV_COUNTER_ATTR_RO(decomp_failed); 1078 NX842DEV_COUNTER_ATTR_RO(swdecomp); 1079 1080 static ssize_t nx842_timehist_show(struct device *, 1081 struct device_attribute *, char *); 1082 1083 static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, 1084 nx842_timehist_show, NULL); 1085 static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, 1086 0444, nx842_timehist_show, NULL); 1087 1088 static ssize_t nx842_timehist_show(struct device *dev, 1089 struct device_attribute *attr, char *buf) { 1090 char *p = buf; 1091 struct nx842_devdata *local_devdata; 1092 atomic64_t *times; 1093 int bytes_remain = PAGE_SIZE; 1094 int bytes; 1095 int i; 1096 1097 rcu_read_lock(); 1098 local_devdata = rcu_dereference(devdata); 1099 if (!local_devdata) { 1100 rcu_read_unlock(); 1101 return 0; 1102 } 1103 1104 if (attr == &dev_attr_comp_times) 1105 times = local_devdata->counters->comp_times; 1106 else if (attr == &dev_attr_decomp_times) 1107 times = local_devdata->counters->decomp_times; 1108 else { 1109 rcu_read_unlock(); 1110 return 0; 1111 } 1112 1113 for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { 1114 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", 1115 i ? (2<<(i-1)) : 0, (2<<i)-1, 1116 atomic64_read(×[i])); 1117 bytes_remain -= bytes; 1118 p += bytes; 1119 } 1120 /* The last bucket holds everything over 1121 * 2<<(NX842_HIST_SLOTS - 2) us */ 1122 bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", 1123 2<<(NX842_HIST_SLOTS - 2), 1124 atomic64_read(×[(NX842_HIST_SLOTS - 1)])); 1125 p += bytes; 1126 1127 rcu_read_unlock(); 1128 return p - buf; 1129 } 1130 1131 static struct attribute *nx842_sysfs_entries[] = { 1132 &dev_attr_comp_complete.attr, 1133 &dev_attr_comp_failed.attr, 1134 &dev_attr_decomp_complete.attr, 1135 &dev_attr_decomp_failed.attr, 1136 &dev_attr_swdecomp.attr, 1137 &dev_attr_comp_times.attr, 1138 &dev_attr_decomp_times.attr, 1139 NULL, 1140 }; 1141 1142 static struct attribute_group nx842_attribute_group = { 1143 .name = NULL, /* put in device directory */ 1144 .attrs = nx842_sysfs_entries, 1145 }; 1146 1147 static int __init nx842_probe(struct vio_dev *viodev, 1148 const struct vio_device_id *id) 1149 { 1150 struct nx842_devdata *old_devdata, *new_devdata = NULL; 1151 unsigned long flags; 1152 int ret = 0; 1153 1154 spin_lock_irqsave(&devdata_mutex, flags); 1155 old_devdata = rcu_dereference_check(devdata, 1156 lockdep_is_held(&devdata_mutex)); 1157 1158 if (old_devdata && old_devdata->vdev != NULL) { 1159 dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); 1160 ret = -1; 1161 goto error_unlock; 1162 } 1163 1164 dev_set_drvdata(&viodev->dev, NULL); 1165 1166 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); 1167 if (!new_devdata) { 1168 dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); 1169 ret = -ENOMEM; 1170 goto error_unlock; 1171 } 1172 1173 new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), 1174 GFP_NOFS); 1175 if (!new_devdata->counters) { 1176 dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); 1177 ret = -ENOMEM; 1178 goto error_unlock; 1179 } 1180 1181 new_devdata->vdev = viodev; 1182 new_devdata->dev = &viodev->dev; 1183 nx842_OF_set_defaults(new_devdata); 1184 1185 rcu_assign_pointer(devdata, new_devdata); 1186 spin_unlock_irqrestore(&devdata_mutex, flags); 1187 synchronize_rcu(); 1188 kfree(old_devdata); 1189 1190 of_reconfig_notifier_register(&nx842_of_nb); 1191 1192 ret = nx842_OF_upd(NULL); 1193 if (ret && ret != -ENODEV) { 1194 dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); 1195 ret = -1; 1196 goto error; 1197 } 1198 1199 rcu_read_lock(); 1200 if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) { 1201 rcu_read_unlock(); 1202 dev_err(&viodev->dev, "failed to set driver data for device\n"); 1203 ret = -1; 1204 goto error; 1205 } 1206 rcu_read_unlock(); 1207 1208 if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { 1209 dev_err(&viodev->dev, "could not create sysfs device attributes\n"); 1210 ret = -1; 1211 goto error; 1212 } 1213 1214 return 0; 1215 1216 error_unlock: 1217 spin_unlock_irqrestore(&devdata_mutex, flags); 1218 if (new_devdata) 1219 kfree(new_devdata->counters); 1220 kfree(new_devdata); 1221 error: 1222 return ret; 1223 } 1224 1225 static int __exit nx842_remove(struct vio_dev *viodev) 1226 { 1227 struct nx842_devdata *old_devdata; 1228 unsigned long flags; 1229 1230 pr_info("Removing IBM Power 842 compression device\n"); 1231 sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); 1232 1233 spin_lock_irqsave(&devdata_mutex, flags); 1234 old_devdata = rcu_dereference_check(devdata, 1235 lockdep_is_held(&devdata_mutex)); 1236 of_reconfig_notifier_unregister(&nx842_of_nb); 1237 rcu_assign_pointer(devdata, NULL); 1238 spin_unlock_irqrestore(&devdata_mutex, flags); 1239 synchronize_rcu(); 1240 dev_set_drvdata(&viodev->dev, NULL); 1241 if (old_devdata) 1242 kfree(old_devdata->counters); 1243 kfree(old_devdata); 1244 return 0; 1245 } 1246 1247 static struct vio_device_id nx842_driver_ids[] = { 1248 {"ibm,compression-v1", "ibm,compression"}, 1249 {"", ""}, 1250 }; 1251 1252 static struct vio_driver nx842_driver = { 1253 .name = MODULE_NAME, 1254 .probe = nx842_probe, 1255 .remove = nx842_remove, 1256 .get_desired_dma = nx842_get_desired_dma, 1257 .id_table = nx842_driver_ids, 1258 }; 1259 1260 static int __init nx842_init(void) 1261 { 1262 struct nx842_devdata *new_devdata; 1263 pr_info("Registering IBM Power 842 compression driver\n"); 1264 1265 RCU_INIT_POINTER(devdata, NULL); 1266 new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); 1267 if (!new_devdata) { 1268 pr_err("Could not allocate memory for device data\n"); 1269 return -ENOMEM; 1270 } 1271 new_devdata->status = UNAVAILABLE; 1272 RCU_INIT_POINTER(devdata, new_devdata); 1273 1274 return vio_register_driver(&nx842_driver); 1275 } 1276 1277 module_init(nx842_init); 1278 1279 static void __exit nx842_exit(void) 1280 { 1281 struct nx842_devdata *old_devdata; 1282 unsigned long flags; 1283 1284 pr_info("Exiting IBM Power 842 compression driver\n"); 1285 spin_lock_irqsave(&devdata_mutex, flags); 1286 old_devdata = rcu_dereference_check(devdata, 1287 lockdep_is_held(&devdata_mutex)); 1288 rcu_assign_pointer(devdata, NULL); 1289 spin_unlock_irqrestore(&devdata_mutex, flags); 1290 synchronize_rcu(); 1291 if (old_devdata) 1292 dev_set_drvdata(old_devdata->dev, NULL); 1293 kfree(old_devdata); 1294 vio_unregister_driver(&nx842_driver); 1295 } 1296 1297 module_exit(nx842_exit); 1298 1299 /********************************* 1300 * 842 software decompressor 1301 *********************************/ 1302 typedef int (*sw842_template_op)(const char **, int *, unsigned char **, 1303 struct sw842_fifo *); 1304 1305 static int sw842_data8(const char **, int *, unsigned char **, 1306 struct sw842_fifo *); 1307 static int sw842_data4(const char **, int *, unsigned char **, 1308 struct sw842_fifo *); 1309 static int sw842_data2(const char **, int *, unsigned char **, 1310 struct sw842_fifo *); 1311 static int sw842_ptr8(const char **, int *, unsigned char **, 1312 struct sw842_fifo *); 1313 static int sw842_ptr4(const char **, int *, unsigned char **, 1314 struct sw842_fifo *); 1315 static int sw842_ptr2(const char **, int *, unsigned char **, 1316 struct sw842_fifo *); 1317 1318 /* special templates */ 1319 #define SW842_TMPL_REPEAT 0x1B 1320 #define SW842_TMPL_ZEROS 0x1C 1321 #define SW842_TMPL_EOF 0x1E 1322 1323 static sw842_template_op sw842_tmpl_ops[26][4] = { 1324 { sw842_data8, NULL}, /* 0 (00000) */ 1325 { sw842_data4, sw842_data2, sw842_ptr2, NULL}, 1326 { sw842_data4, sw842_ptr2, sw842_data2, NULL}, 1327 { sw842_data4, sw842_ptr2, sw842_ptr2, NULL}, 1328 { sw842_data4, sw842_ptr4, NULL}, 1329 { sw842_data2, sw842_ptr2, sw842_data4, NULL}, 1330 { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2}, 1331 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2}, 1332 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,}, 1333 { sw842_data2, sw842_ptr2, sw842_ptr4, NULL}, 1334 { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */ 1335 { sw842_ptr2, sw842_data4, sw842_ptr2, NULL}, 1336 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2}, 1337 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2}, 1338 { sw842_ptr2, sw842_data2, sw842_ptr4, NULL}, 1339 { sw842_ptr2, sw842_ptr2, sw842_data4, NULL}, 1340 { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2}, 1341 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2}, 1342 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2}, 1343 { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL}, 1344 { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */ 1345 { sw842_ptr4, sw842_data2, sw842_ptr2, NULL}, 1346 { sw842_ptr4, sw842_ptr2, sw842_data2, NULL}, 1347 { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL}, 1348 { sw842_ptr4, sw842_ptr4, NULL}, 1349 { sw842_ptr8, NULL} 1350 }; 1351 1352 /* Software decompress helpers */ 1353 1354 static uint8_t sw842_get_byte(const char *buf, int bit) 1355 { 1356 uint8_t tmpl; 1357 uint16_t tmp; 1358 tmp = htons(*(uint16_t *)(buf)); 1359 tmp = (uint16_t)(tmp << bit); 1360 tmp = ntohs(tmp); 1361 memcpy(&tmpl, &tmp, 1); 1362 return tmpl; 1363 } 1364 1365 static uint8_t sw842_get_template(const char **buf, int *bit) 1366 { 1367 uint8_t byte; 1368 byte = sw842_get_byte(*buf, *bit); 1369 byte = byte >> 3; 1370 byte &= 0x1F; 1371 *buf += (*bit + 5) / 8; 1372 *bit = (*bit + 5) % 8; 1373 return byte; 1374 } 1375 1376 /* repeat_count happens to be 5-bit too (like the template) */ 1377 static uint8_t sw842_get_repeat_count(const char **buf, int *bit) 1378 { 1379 uint8_t byte; 1380 byte = sw842_get_byte(*buf, *bit); 1381 byte = byte >> 2; 1382 byte &= 0x3F; 1383 *buf += (*bit + 6) / 8; 1384 *bit = (*bit + 6) % 8; 1385 return byte; 1386 } 1387 1388 static uint8_t sw842_get_ptr2(const char **buf, int *bit) 1389 { 1390 uint8_t ptr; 1391 ptr = sw842_get_byte(*buf, *bit); 1392 (*buf)++; 1393 return ptr; 1394 } 1395 1396 static uint16_t sw842_get_ptr4(const char **buf, int *bit, 1397 struct sw842_fifo *fifo) 1398 { 1399 uint16_t ptr; 1400 ptr = htons(*(uint16_t *)(*buf)); 1401 ptr = (uint16_t)(ptr << *bit); 1402 ptr = ptr >> 7; 1403 ptr &= 0x01FF; 1404 *buf += (*bit + 9) / 8; 1405 *bit = (*bit + 9) % 8; 1406 return ptr; 1407 } 1408 1409 static uint8_t sw842_get_ptr8(const char **buf, int *bit, 1410 struct sw842_fifo *fifo) 1411 { 1412 return sw842_get_ptr2(buf, bit); 1413 } 1414 1415 /* Software decompress template ops */ 1416 1417 static int sw842_data8(const char **inbuf, int *inbit, 1418 unsigned char **outbuf, struct sw842_fifo *fifo) 1419 { 1420 int ret; 1421 1422 ret = sw842_data4(inbuf, inbit, outbuf, fifo); 1423 if (ret) 1424 return ret; 1425 ret = sw842_data4(inbuf, inbit, outbuf, fifo); 1426 return ret; 1427 } 1428 1429 static int sw842_data4(const char **inbuf, int *inbit, 1430 unsigned char **outbuf, struct sw842_fifo *fifo) 1431 { 1432 int ret; 1433 1434 ret = sw842_data2(inbuf, inbit, outbuf, fifo); 1435 if (ret) 1436 return ret; 1437 ret = sw842_data2(inbuf, inbit, outbuf, fifo); 1438 return ret; 1439 } 1440 1441 static int sw842_data2(const char **inbuf, int *inbit, 1442 unsigned char **outbuf, struct sw842_fifo *fifo) 1443 { 1444 **outbuf = sw842_get_byte(*inbuf, *inbit); 1445 (*inbuf)++; 1446 (*outbuf)++; 1447 **outbuf = sw842_get_byte(*inbuf, *inbit); 1448 (*inbuf)++; 1449 (*outbuf)++; 1450 return 0; 1451 } 1452 1453 static int sw842_ptr8(const char **inbuf, int *inbit, 1454 unsigned char **outbuf, struct sw842_fifo *fifo) 1455 { 1456 uint8_t ptr; 1457 ptr = sw842_get_ptr8(inbuf, inbit, fifo); 1458 if (!fifo->f84_full && (ptr >= fifo->f8_count)) 1459 return 1; 1460 memcpy(*outbuf, fifo->f8[ptr], 8); 1461 *outbuf += 8; 1462 return 0; 1463 } 1464 1465 static int sw842_ptr4(const char **inbuf, int *inbit, 1466 unsigned char **outbuf, struct sw842_fifo *fifo) 1467 { 1468 uint16_t ptr; 1469 ptr = sw842_get_ptr4(inbuf, inbit, fifo); 1470 if (!fifo->f84_full && (ptr >= fifo->f4_count)) 1471 return 1; 1472 memcpy(*outbuf, fifo->f4[ptr], 4); 1473 *outbuf += 4; 1474 return 0; 1475 } 1476 1477 static int sw842_ptr2(const char **inbuf, int *inbit, 1478 unsigned char **outbuf, struct sw842_fifo *fifo) 1479 { 1480 uint8_t ptr; 1481 ptr = sw842_get_ptr2(inbuf, inbit); 1482 if (!fifo->f2_full && (ptr >= fifo->f2_count)) 1483 return 1; 1484 memcpy(*outbuf, fifo->f2[ptr], 2); 1485 *outbuf += 2; 1486 return 0; 1487 } 1488 1489 static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo) 1490 { 1491 unsigned char initial_f2count = fifo->f2_count; 1492 1493 memcpy(fifo->f8[fifo->f8_count], buf, 8); 1494 fifo->f4_count += 2; 1495 fifo->f8_count += 1; 1496 1497 if (!fifo->f84_full && fifo->f4_count >= 512) { 1498 fifo->f84_full = 1; 1499 fifo->f4_count /= 512; 1500 } 1501 1502 memcpy(fifo->f2[fifo->f2_count++], buf, 2); 1503 memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2); 1504 memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2); 1505 memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2); 1506 if (fifo->f2_count < initial_f2count) 1507 fifo->f2_full = 1; 1508 } 1509 1510 static int sw842_decompress(const unsigned char *src, int srclen, 1511 unsigned char *dst, int *destlen, 1512 const void *wrkmem) 1513 { 1514 uint8_t tmpl; 1515 const char *inbuf; 1516 int inbit = 0; 1517 unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf; 1518 const char *inbuf_end; 1519 sw842_template_op op; 1520 int opindex; 1521 int i, repeat_count; 1522 struct sw842_fifo *fifo; 1523 int ret = 0; 1524 1525 fifo = &((struct nx842_workmem *)(wrkmem))->swfifo; 1526 memset(fifo, 0, sizeof(*fifo)); 1527 1528 origbuf = NULL; 1529 inbuf = src; 1530 inbuf_end = src + srclen; 1531 outbuf = dst; 1532 outbuf_end = dst + *destlen; 1533 1534 while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) { 1535 if (inbuf >= inbuf_end) { 1536 ret = -EINVAL; 1537 goto out; 1538 } 1539 1540 opindex = 0; 1541 prevbuf = origbuf; 1542 origbuf = outbuf; 1543 switch (tmpl) { 1544 case SW842_TMPL_REPEAT: 1545 if (prevbuf == NULL) { 1546 ret = -EINVAL; 1547 goto out; 1548 } 1549 1550 repeat_count = sw842_get_repeat_count(&inbuf, 1551 &inbit) + 1; 1552 1553 /* Did the repeat count advance past the end of input */ 1554 if (inbuf > inbuf_end) { 1555 ret = -EINVAL; 1556 goto out; 1557 } 1558 1559 for (i = 0; i < repeat_count; i++) { 1560 /* Would this overflow the output buffer */ 1561 if ((outbuf + 8) > outbuf_end) { 1562 ret = -ENOSPC; 1563 goto out; 1564 } 1565 1566 memcpy(outbuf, prevbuf, 8); 1567 sw842_copy_to_fifo(outbuf, fifo); 1568 outbuf += 8; 1569 } 1570 break; 1571 1572 case SW842_TMPL_ZEROS: 1573 /* Would this overflow the output buffer */ 1574 if ((outbuf + 8) > outbuf_end) { 1575 ret = -ENOSPC; 1576 goto out; 1577 } 1578 1579 memset(outbuf, 0, 8); 1580 sw842_copy_to_fifo(outbuf, fifo); 1581 outbuf += 8; 1582 break; 1583 1584 default: 1585 if (tmpl > 25) { 1586 ret = -EINVAL; 1587 goto out; 1588 } 1589 1590 /* Does this go past the end of the input buffer */ 1591 if ((inbuf + 2) > inbuf_end) { 1592 ret = -EINVAL; 1593 goto out; 1594 } 1595 1596 /* Would this overflow the output buffer */ 1597 if ((outbuf + 8) > outbuf_end) { 1598 ret = -ENOSPC; 1599 goto out; 1600 } 1601 1602 while (opindex < 4 && 1603 (op = sw842_tmpl_ops[tmpl][opindex++]) 1604 != NULL) { 1605 ret = (*op)(&inbuf, &inbit, &outbuf, fifo); 1606 if (ret) { 1607 ret = -EINVAL; 1608 goto out; 1609 } 1610 sw842_copy_to_fifo(origbuf, fifo); 1611 } 1612 } 1613 } 1614 1615 out: 1616 if (!ret) 1617 *destlen = (unsigned int)(outbuf - dst); 1618 else 1619 *destlen = 0; 1620 1621 return ret; 1622 } 1623