xref: /linux/drivers/crypto/nx/nx-842.c (revision 08ec212c0f92cbf30e3ecc7349f18151714041d6)
1 /*
2  * Driver for IBM Power 842 compression accelerator
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17  *
18  * Copyright (C) IBM Corporation, 2012
19  *
20  * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
21  *          Seth Jennings <sjenning@linux.vnet.ibm.com>
22  */
23 
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/nx842.h>
27 #include <linux/of.h>
28 #include <linux/slab.h>
29 
30 #include <asm/page.h>
31 #include <asm/pSeries_reconfig.h>
32 #include <asm/vio.h>
33 
34 #include "nx_csbcpb.h" /* struct nx_csbcpb */
35 
36 #define MODULE_NAME "nx-compress"
37 MODULE_LICENSE("GPL");
38 MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
39 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
40 
41 #define SHIFT_4K 12
42 #define SHIFT_64K 16
43 #define SIZE_4K (1UL << SHIFT_4K)
44 #define SIZE_64K (1UL << SHIFT_64K)
45 
46 /* IO buffer must be 128 byte aligned */
47 #define IO_BUFFER_ALIGN 128
48 
49 struct nx842_header {
50 	int blocks_nr; /* number of compressed blocks */
51 	int offset; /* offset of the first block (from beginning of header) */
52 	int sizes[0]; /* size of compressed blocks */
53 };
54 
55 static inline int nx842_header_size(const struct nx842_header *hdr)
56 {
57 	return sizeof(struct nx842_header) +
58 			hdr->blocks_nr * sizeof(hdr->sizes[0]);
59 }
60 
61 /* Macros for fields within nx_csbcpb */
62 /* Check the valid bit within the csbcpb valid field */
63 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
64 
65 /* CE macros operate on the completion_extension field bits in the csbcpb.
66  * CE0 0=full completion, 1=partial completion
67  * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
68  * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
69 #define NX842_CSBCPB_CE0(x)	(x & BIT_MASK(7))
70 #define NX842_CSBCPB_CE1(x)	(x & BIT_MASK(6))
71 #define NX842_CSBCPB_CE2(x)	(x & BIT_MASK(5))
72 
73 /* The NX unit accepts data only on 4K page boundaries */
74 #define NX842_HW_PAGE_SHIFT	SHIFT_4K
75 #define NX842_HW_PAGE_SIZE	(ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
76 #define NX842_HW_PAGE_MASK	(~(NX842_HW_PAGE_SIZE-1))
77 
78 enum nx842_status {
79 	UNAVAILABLE,
80 	AVAILABLE
81 };
82 
83 struct ibm_nx842_counters {
84 	atomic64_t comp_complete;
85 	atomic64_t comp_failed;
86 	atomic64_t decomp_complete;
87 	atomic64_t decomp_failed;
88 	atomic64_t swdecomp;
89 	atomic64_t comp_times[32];
90 	atomic64_t decomp_times[32];
91 };
92 
93 static struct nx842_devdata {
94 	struct vio_dev *vdev;
95 	struct device *dev;
96 	struct ibm_nx842_counters *counters;
97 	unsigned int max_sg_len;
98 	unsigned int max_sync_size;
99 	unsigned int max_sync_sg;
100 	enum nx842_status status;
101 } __rcu *devdata;
102 static DEFINE_SPINLOCK(devdata_mutex);
103 
104 #define NX842_COUNTER_INC(_x) \
105 static inline void nx842_inc_##_x( \
106 	const struct nx842_devdata *dev) { \
107 	if (dev) \
108 		atomic64_inc(&dev->counters->_x); \
109 }
110 NX842_COUNTER_INC(comp_complete);
111 NX842_COUNTER_INC(comp_failed);
112 NX842_COUNTER_INC(decomp_complete);
113 NX842_COUNTER_INC(decomp_failed);
114 NX842_COUNTER_INC(swdecomp);
115 
116 #define NX842_HIST_SLOTS 16
117 
118 static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
119 {
120 	int bucket = fls(time);
121 
122 	if (bucket)
123 		bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
124 
125 	atomic64_inc(&times[bucket]);
126 }
127 
128 /* NX unit operation flags */
129 #define NX842_OP_COMPRESS	0x0
130 #define NX842_OP_CRC		0x1
131 #define NX842_OP_DECOMPRESS	0x2
132 #define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
133 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
134 #define NX842_OP_ASYNC		(1<<23)
135 #define NX842_OP_NOTIFY		(1<<22)
136 #define NX842_OP_NOTIFY_INT(x)	((x & 0xff)<<8)
137 
138 static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
139 {
140 	/* No use of DMA mappings within the driver. */
141 	return 0;
142 }
143 
144 struct nx842_slentry {
145 	unsigned long ptr; /* Real address (use __pa()) */
146 	unsigned long len;
147 };
148 
149 /* pHyp scatterlist entry */
150 struct nx842_scatterlist {
151 	int entry_nr; /* number of slentries */
152 	struct nx842_slentry *entries; /* ptr to array of slentries */
153 };
154 
155 /* Does not include sizeof(entry_nr) in the size */
156 static inline unsigned long nx842_get_scatterlist_size(
157 				struct nx842_scatterlist *sl)
158 {
159 	return sl->entry_nr * sizeof(struct nx842_slentry);
160 }
161 
162 static int nx842_build_scatterlist(unsigned long buf, int len,
163 			struct nx842_scatterlist *sl)
164 {
165 	unsigned long nextpage;
166 	struct nx842_slentry *entry;
167 
168 	sl->entry_nr = 0;
169 
170 	entry = sl->entries;
171 	while (len) {
172 		entry->ptr = __pa(buf);
173 		nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
174 		if (nextpage < buf + len) {
175 			/* we aren't at the end yet */
176 			if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
177 				/* we are in the middle (or beginning) */
178 				entry->len = NX842_HW_PAGE_SIZE;
179 			else
180 				/* we are at the beginning */
181 				entry->len = nextpage - buf;
182 		} else {
183 			/* at the end */
184 			entry->len = len;
185 		}
186 
187 		len -= entry->len;
188 		buf += entry->len;
189 		sl->entry_nr++;
190 		entry++;
191 	}
192 
193 	return 0;
194 }
195 
196 /*
197  * Working memory for software decompression
198  */
199 struct sw842_fifo {
200 	union {
201 		char f8[256][8];
202 		char f4[512][4];
203 	};
204 	char f2[256][2];
205 	unsigned char f84_full;
206 	unsigned char f2_full;
207 	unsigned char f8_count;
208 	unsigned char f2_count;
209 	unsigned int f4_count;
210 };
211 
212 /*
213  * Working memory for crypto API
214  */
215 struct nx842_workmem {
216 	char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
217 	union {
218 		/* hardware working memory */
219 		struct {
220 			/* scatterlist */
221 			char slin[SIZE_4K];
222 			char slout[SIZE_4K];
223 			/* coprocessor status/parameter block */
224 			struct nx_csbcpb csbcpb;
225 		};
226 		/* software working memory */
227 		struct sw842_fifo swfifo; /* software decompression fifo */
228 	};
229 };
230 
231 int nx842_get_workmem_size(void)
232 {
233 	return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE;
234 }
235 EXPORT_SYMBOL_GPL(nx842_get_workmem_size);
236 
237 int nx842_get_workmem_size_aligned(void)
238 {
239 	return sizeof(struct nx842_workmem);
240 }
241 EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned);
242 
243 static int nx842_validate_result(struct device *dev,
244 	struct cop_status_block *csb)
245 {
246 	/* The csb must be valid after returning from vio_h_cop_sync */
247 	if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
248 		dev_err(dev, "%s: cspcbp not valid upon completion.\n",
249 				__func__);
250 		dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
251 				csb->valid,
252 				csb->crb_seq_number,
253 				csb->completion_code,
254 				csb->completion_extension);
255 		dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
256 				csb->processed_byte_count,
257 				(unsigned long)csb->address);
258 		return -EIO;
259 	}
260 
261 	/* Check return values from the hardware in the CSB */
262 	switch (csb->completion_code) {
263 	case 0:	/* Completed without error */
264 		break;
265 	case 64: /* Target bytes > Source bytes during compression */
266 	case 13: /* Output buffer too small */
267 		dev_dbg(dev, "%s: Compression output larger than input\n",
268 					__func__);
269 		return -ENOSPC;
270 	case 66: /* Input data contains an illegal template field */
271 	case 67: /* Template indicates data past the end of the input stream */
272 		dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
273 					__func__, csb->completion_code);
274 		return -EINVAL;
275 	default:
276 		dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
277 					__func__, csb->completion_code);
278 		return -EIO;
279 	}
280 
281 	/* Hardware sanity check */
282 	if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
283 		dev_err(dev, "%s: No error returned by hardware, but "
284 				"data returned is unusable, contact support.\n"
285 				"(Additional info: csbcbp->processed bytes "
286 				"does not specify processed bytes for the "
287 				"target buffer.)\n", __func__);
288 		return -EIO;
289 	}
290 
291 	return 0;
292 }
293 
294 /**
295  * nx842_compress - Compress data using the 842 algorithm
296  *
297  * Compression provide by the NX842 coprocessor on IBM Power systems.
298  * The input buffer is compressed and the result is stored in the
299  * provided output buffer.
300  *
301  * Upon return from this function @outlen contains the length of the
302  * compressed data.  If there is an error then @outlen will be 0 and an
303  * error will be specified by the return code from this function.
304  *
305  * @in: Pointer to input buffer, must be page aligned
306  * @inlen: Length of input buffer, must be PAGE_SIZE
307  * @out: Pointer to output buffer
308  * @outlen: Length of output buffer
309  * @wrkmem: ptr to buffer for working memory, size determined by
310  *          nx842_get_workmem_size()
311  *
312  * Returns:
313  *   0		Success, output of length @outlen stored in the buffer at @out
314  *   -ENOMEM	Unable to allocate internal buffers
315  *   -ENOSPC	Output buffer is to small
316  *   -EMSGSIZE	XXX Difficult to describe this limitation
317  *   -EIO	Internal error
318  *   -ENODEV	Hardware unavailable
319  */
320 int nx842_compress(const unsigned char *in, unsigned int inlen,
321 		       unsigned char *out, unsigned int *outlen, void *wmem)
322 {
323 	struct nx842_header *hdr;
324 	struct nx842_devdata *local_devdata;
325 	struct device *dev = NULL;
326 	struct nx842_workmem *workmem;
327 	struct nx842_scatterlist slin, slout;
328 	struct nx_csbcpb *csbcpb;
329 	int ret = 0, max_sync_size, i, bytesleft, size, hdrsize;
330 	unsigned long inbuf, outbuf, padding;
331 	struct vio_pfo_op op = {
332 		.done = NULL,
333 		.handle = 0,
334 		.timeout = 0,
335 	};
336 	unsigned long start_time = get_tb();
337 
338 	/*
339 	 * Make sure input buffer is 64k page aligned.  This is assumed since
340 	 * this driver is designed for page compression only (for now).  This
341 	 * is very nice since we can now use direct DDE(s) for the input and
342 	 * the alignment is guaranteed.
343 	*/
344 	inbuf = (unsigned long)in;
345 	if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE)
346 		return -EINVAL;
347 
348 	rcu_read_lock();
349 	local_devdata = rcu_dereference(devdata);
350 	if (!local_devdata || !local_devdata->dev) {
351 		rcu_read_unlock();
352 		return -ENODEV;
353 	}
354 	max_sync_size = local_devdata->max_sync_size;
355 	dev = local_devdata->dev;
356 
357 	/* Create the header */
358 	hdr = (struct nx842_header *)out;
359 	hdr->blocks_nr = PAGE_SIZE / max_sync_size;
360 	hdrsize = nx842_header_size(hdr);
361 	outbuf = (unsigned long)out + hdrsize;
362 	bytesleft = *outlen - hdrsize;
363 
364 	/* Init scatterlist */
365 	workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
366 		NX842_HW_PAGE_SIZE);
367 	slin.entries = (struct nx842_slentry *)workmem->slin;
368 	slout.entries = (struct nx842_slentry *)workmem->slout;
369 
370 	/* Init operation */
371 	op.flags = NX842_OP_COMPRESS;
372 	csbcpb = &workmem->csbcpb;
373 	memset(csbcpb, 0, sizeof(*csbcpb));
374 	op.csbcpb = __pa(csbcpb);
375 	op.out = __pa(slout.entries);
376 
377 	for (i = 0; i < hdr->blocks_nr; i++) {
378 		/*
379 		 * Aligning the output blocks to 128 bytes does waste space,
380 		 * but it prevents the need for bounce buffers and memory
381 		 * copies.  It also simplifies the code a lot.  In the worst
382 		 * case (64k page, 4k max_sync_size), you lose up to
383 		 * (128*16)/64k = ~3% the compression factor. For 64k
384 		 * max_sync_size, the loss would be at most 128/64k = ~0.2%.
385 		 */
386 		padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf;
387 		outbuf += padding;
388 		bytesleft -= padding;
389 		if (i == 0)
390 			/* save offset into first block in header */
391 			hdr->offset = padding + hdrsize;
392 
393 		if (bytesleft <= 0) {
394 			ret = -ENOSPC;
395 			goto unlock;
396 		}
397 
398 		/*
399 		 * NOTE: If the default max_sync_size is changed from 4k
400 		 * to 64k, remove the "likely" case below, since a
401 		 * scatterlist will always be needed.
402 		 */
403 		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
404 			/* Create direct DDE */
405 			op.in = __pa(inbuf);
406 			op.inlen = max_sync_size;
407 
408 		} else {
409 			/* Create indirect DDE (scatterlist) */
410 			nx842_build_scatterlist(inbuf, max_sync_size, &slin);
411 			op.in = __pa(slin.entries);
412 			op.inlen = -nx842_get_scatterlist_size(&slin);
413 		}
414 
415 		/*
416 		 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect
417 		 * DDE is required for the outbuf.
418 		 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must
419 		 * also be page aligned (1 in 128/4k=32 chance) in order
420 		 * to use a direct DDE.
421 		 * This is unlikely, just use an indirect DDE always.
422 		 */
423 		nx842_build_scatterlist(outbuf,
424 			min(bytesleft, max_sync_size), &slout);
425 		/* op.out set before loop */
426 		op.outlen = -nx842_get_scatterlist_size(&slout);
427 
428 		/* Send request to pHyp */
429 		ret = vio_h_cop_sync(local_devdata->vdev, &op);
430 
431 		/* Check for pHyp error */
432 		if (ret) {
433 			dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
434 				__func__, ret, op.hcall_err);
435 			ret = -EIO;
436 			goto unlock;
437 		}
438 
439 		/* Check for hardware error */
440 		ret = nx842_validate_result(dev, &csbcpb->csb);
441 		if (ret && ret != -ENOSPC)
442 			goto unlock;
443 
444 		/* Handle incompressible data */
445 		if (unlikely(ret == -ENOSPC)) {
446 			if (bytesleft < max_sync_size) {
447 				/*
448 				 * Not enough space left in the output buffer
449 				 * to store uncompressed block
450 				 */
451 				goto unlock;
452 			} else {
453 				/* Store incompressible block */
454 				memcpy((void *)outbuf, (void *)inbuf,
455 					max_sync_size);
456 				hdr->sizes[i] = -max_sync_size;
457 				outbuf += max_sync_size;
458 				bytesleft -= max_sync_size;
459 				/* Reset ret, incompressible data handled */
460 				ret = 0;
461 			}
462 		} else {
463 			/* Normal case, compression was successful */
464 			size = csbcpb->csb.processed_byte_count;
465 			dev_dbg(dev, "%s: processed_bytes=%d\n",
466 				__func__, size);
467 			hdr->sizes[i] = size;
468 			outbuf += size;
469 			bytesleft -= size;
470 		}
471 
472 		inbuf += max_sync_size;
473 	}
474 
475 	*outlen = (unsigned int)(outbuf - (unsigned long)out);
476 
477 unlock:
478 	if (ret)
479 		nx842_inc_comp_failed(local_devdata);
480 	else {
481 		nx842_inc_comp_complete(local_devdata);
482 		ibm_nx842_incr_hist(local_devdata->counters->comp_times,
483 			(get_tb() - start_time) / tb_ticks_per_usec);
484 	}
485 	rcu_read_unlock();
486 	return ret;
487 }
488 EXPORT_SYMBOL_GPL(nx842_compress);
489 
490 static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
491 			const void *);
492 
493 /**
494  * nx842_decompress - Decompress data using the 842 algorithm
495  *
496  * Decompression provide by the NX842 coprocessor on IBM Power systems.
497  * The input buffer is decompressed and the result is stored in the
498  * provided output buffer.  The size allocated to the output buffer is
499  * provided by the caller of this function in @outlen.  Upon return from
500  * this function @outlen contains the length of the decompressed data.
501  * If there is an error then @outlen will be 0 and an error will be
502  * specified by the return code from this function.
503  *
504  * @in: Pointer to input buffer, will use bounce buffer if not 128 byte
505  *      aligned
506  * @inlen: Length of input buffer
507  * @out: Pointer to output buffer, must be page aligned
508  * @outlen: Length of output buffer, must be PAGE_SIZE
509  * @wrkmem: ptr to buffer for working memory, size determined by
510  *          nx842_get_workmem_size()
511  *
512  * Returns:
513  *   0		Success, output of length @outlen stored in the buffer at @out
514  *   -ENODEV	Hardware decompression device is unavailable
515  *   -ENOMEM	Unable to allocate internal buffers
516  *   -ENOSPC	Output buffer is to small
517  *   -EINVAL	Bad input data encountered when attempting decompress
518  *   -EIO	Internal error
519  */
520 int nx842_decompress(const unsigned char *in, unsigned int inlen,
521 			 unsigned char *out, unsigned int *outlen, void *wmem)
522 {
523 	struct nx842_header *hdr;
524 	struct nx842_devdata *local_devdata;
525 	struct device *dev = NULL;
526 	struct nx842_workmem *workmem;
527 	struct nx842_scatterlist slin, slout;
528 	struct nx_csbcpb *csbcpb;
529 	int ret = 0, i, size, max_sync_size;
530 	unsigned long inbuf, outbuf;
531 	struct vio_pfo_op op = {
532 		.done = NULL,
533 		.handle = 0,
534 		.timeout = 0,
535 	};
536 	unsigned long start_time = get_tb();
537 
538 	/* Ensure page alignment and size */
539 	outbuf = (unsigned long)out;
540 	if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE)
541 		return -EINVAL;
542 
543 	rcu_read_lock();
544 	local_devdata = rcu_dereference(devdata);
545 	if (local_devdata)
546 		dev = local_devdata->dev;
547 
548 	/* Get header */
549 	hdr = (struct nx842_header *)in;
550 
551 	workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
552 		NX842_HW_PAGE_SIZE);
553 
554 	inbuf = (unsigned long)in + hdr->offset;
555 	if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
556 		/* Copy block(s) into bounce buffer for alignment */
557 		memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
558 		inbuf = (unsigned long)workmem->bounce;
559 	}
560 
561 	/* Init scatterlist */
562 	slin.entries = (struct nx842_slentry *)workmem->slin;
563 	slout.entries = (struct nx842_slentry *)workmem->slout;
564 
565 	/* Init operation */
566 	op.flags = NX842_OP_DECOMPRESS;
567 	csbcpb = &workmem->csbcpb;
568 	memset(csbcpb, 0, sizeof(*csbcpb));
569 	op.csbcpb = __pa(csbcpb);
570 
571 	/*
572 	 * max_sync_size may have changed since compression,
573 	 * so we can't read it from the device info. We need
574 	 * to derive it from hdr->blocks_nr.
575 	 */
576 	max_sync_size = PAGE_SIZE / hdr->blocks_nr;
577 
578 	for (i = 0; i < hdr->blocks_nr; i++) {
579 		/* Skip padding */
580 		inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN);
581 
582 		if (hdr->sizes[i] < 0) {
583 			/* Negative sizes indicate uncompressed data blocks */
584 			size = abs(hdr->sizes[i]);
585 			memcpy((void *)outbuf, (void *)inbuf, size);
586 			outbuf += size;
587 			inbuf += size;
588 			continue;
589 		}
590 
591 		if (!dev)
592 			goto sw;
593 
594 		/*
595 		 * The better the compression, the more likely the "likely"
596 		 * case becomes.
597 		 */
598 		if (likely((inbuf & NX842_HW_PAGE_MASK) ==
599 			((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
600 			/* Create direct DDE */
601 			op.in = __pa(inbuf);
602 			op.inlen = hdr->sizes[i];
603 		} else {
604 			/* Create indirect DDE (scatterlist) */
605 			nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
606 			op.in = __pa(slin.entries);
607 			op.inlen = -nx842_get_scatterlist_size(&slin);
608 		}
609 
610 		/*
611 		 * NOTE: If the default max_sync_size is changed from 4k
612 		 * to 64k, remove the "likely" case below, since a
613 		 * scatterlist will always be needed.
614 		 */
615 		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
616 			/* Create direct DDE */
617 			op.out = __pa(outbuf);
618 			op.outlen = max_sync_size;
619 		} else {
620 			/* Create indirect DDE (scatterlist) */
621 			nx842_build_scatterlist(outbuf, max_sync_size, &slout);
622 			op.out = __pa(slout.entries);
623 			op.outlen = -nx842_get_scatterlist_size(&slout);
624 		}
625 
626 		/* Send request to pHyp */
627 		ret = vio_h_cop_sync(local_devdata->vdev, &op);
628 
629 		/* Check for pHyp error */
630 		if (ret) {
631 			dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
632 				__func__, ret, op.hcall_err);
633 			dev = NULL;
634 			goto sw;
635 		}
636 
637 		/* Check for hardware error */
638 		ret = nx842_validate_result(dev, &csbcpb->csb);
639 		if (ret) {
640 			dev = NULL;
641 			goto sw;
642 		}
643 
644 		/* HW decompression success */
645 		inbuf += hdr->sizes[i];
646 		outbuf += csbcpb->csb.processed_byte_count;
647 		continue;
648 
649 sw:
650 		/* software decompression */
651 		size = max_sync_size;
652 		ret = sw842_decompress(
653 			(unsigned char *)inbuf, hdr->sizes[i],
654 			(unsigned char *)outbuf, &size, wmem);
655 		if (ret)
656 			pr_debug("%s: sw842_decompress failed with %d\n",
657 				__func__, ret);
658 
659 		if (ret) {
660 			if (ret != -ENOSPC && ret != -EINVAL &&
661 					ret != -EMSGSIZE)
662 				ret = -EIO;
663 			goto unlock;
664 		}
665 
666 		/* SW decompression success */
667 		inbuf += hdr->sizes[i];
668 		outbuf += size;
669 	}
670 
671 	*outlen = (unsigned int)(outbuf - (unsigned long)out);
672 
673 unlock:
674 	if (ret)
675 		/* decompress fail */
676 		nx842_inc_decomp_failed(local_devdata);
677 	else {
678 		if (!dev)
679 			/* software decompress */
680 			nx842_inc_swdecomp(local_devdata);
681 		nx842_inc_decomp_complete(local_devdata);
682 		ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
683 			(get_tb() - start_time) / tb_ticks_per_usec);
684 	}
685 
686 	rcu_read_unlock();
687 	return ret;
688 }
689 EXPORT_SYMBOL_GPL(nx842_decompress);
690 
691 /**
692  * nx842_OF_set_defaults -- Set default (disabled) values for devdata
693  *
694  * @devdata - struct nx842_devdata to update
695  *
696  * Returns:
697  *  0 on success
698  *  -ENOENT if @devdata ptr is NULL
699  */
700 static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
701 {
702 	if (devdata) {
703 		devdata->max_sync_size = 0;
704 		devdata->max_sync_sg = 0;
705 		devdata->max_sg_len = 0;
706 		devdata->status = UNAVAILABLE;
707 		return 0;
708 	} else
709 		return -ENOENT;
710 }
711 
712 /**
713  * nx842_OF_upd_status -- Update the device info from OF status prop
714  *
715  * The status property indicates if the accelerator is enabled.  If the
716  * device is in the OF tree it indicates that the hardware is present.
717  * The status field indicates if the device is enabled when the status
718  * is 'okay'.  Otherwise the device driver will be disabled.
719  *
720  * @devdata - struct nx842_devdata to update
721  * @prop - struct property point containing the maxsyncop for the update
722  *
723  * Returns:
724  *  0 - Device is available
725  *  -EINVAL - Device is not available
726  */
727 static int nx842_OF_upd_status(struct nx842_devdata *devdata,
728 					struct property *prop) {
729 	int ret = 0;
730 	const char *status = (const char *)prop->value;
731 
732 	if (!strncmp(status, "okay", (size_t)prop->length)) {
733 		devdata->status = AVAILABLE;
734 	} else {
735 		dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
736 				__func__, status);
737 		devdata->status = UNAVAILABLE;
738 	}
739 
740 	return ret;
741 }
742 
743 /**
744  * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
745  *
746  * Definition of the 'ibm,max-sg-len' OF property:
747  *  This field indicates the maximum byte length of a scatter list
748  *  for the platform facility. It is a single cell encoded as with encode-int.
749  *
750  * Example:
751  *  # od -x ibm,max-sg-len
752  *  0000000 0000 0ff0
753  *
754  *  In this example, the maximum byte length of a scatter list is
755  *  0x0ff0 (4,080).
756  *
757  * @devdata - struct nx842_devdata to update
758  * @prop - struct property point containing the maxsyncop for the update
759  *
760  * Returns:
761  *  0 on success
762  *  -EINVAL on failure
763  */
764 static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
765 					struct property *prop) {
766 	int ret = 0;
767 	const int *maxsglen = prop->value;
768 
769 	if (prop->length != sizeof(*maxsglen)) {
770 		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
771 		dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
772 				prop->length, sizeof(*maxsglen));
773 		ret = -EINVAL;
774 	} else {
775 		devdata->max_sg_len = (unsigned int)min(*maxsglen,
776 				(int)NX842_HW_PAGE_SIZE);
777 	}
778 
779 	return ret;
780 }
781 
782 /**
783  * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
784  *
785  * Definition of the 'ibm,max-sync-cop' OF property:
786  *  Two series of cells.  The first series of cells represents the maximums
787  *  that can be synchronously compressed. The second series of cells
788  *  represents the maximums that can be synchronously decompressed.
789  *  1. The first cell in each series contains the count of the number of
790  *     data length, scatter list elements pairs that follow – each being
791  *     of the form
792  *    a. One cell data byte length
793  *    b. One cell total number of scatter list elements
794  *
795  * Example:
796  *  # od -x ibm,max-sync-cop
797  *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
798  *  0000020 0000 1000 0000 01fe
799  *
800  *  In this example, compression supports 0x1000 (4,096) data byte length
801  *  and 0x1fe (510) total scatter list elements.  Decompression supports
802  *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
803  *  elements.
804  *
805  * @devdata - struct nx842_devdata to update
806  * @prop - struct property point containing the maxsyncop for the update
807  *
808  * Returns:
809  *  0 on success
810  *  -EINVAL on failure
811  */
812 static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
813 					struct property *prop) {
814 	int ret = 0;
815 	const struct maxsynccop_t {
816 		int comp_elements;
817 		int comp_data_limit;
818 		int comp_sg_limit;
819 		int decomp_elements;
820 		int decomp_data_limit;
821 		int decomp_sg_limit;
822 	} *maxsynccop;
823 
824 	if (prop->length != sizeof(*maxsynccop)) {
825 		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
826 		dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
827 				sizeof(*maxsynccop));
828 		ret = -EINVAL;
829 		goto out;
830 	}
831 
832 	maxsynccop = (const struct maxsynccop_t *)prop->value;
833 
834 	/* Use one limit rather than separate limits for compression and
835 	 * decompression. Set a maximum for this so as not to exceed the
836 	 * size that the header can support and round the value down to
837 	 * the hardware page size (4K) */
838 	devdata->max_sync_size =
839 			(unsigned int)min(maxsynccop->comp_data_limit,
840 					maxsynccop->decomp_data_limit);
841 
842 	devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
843 					SIZE_64K);
844 
845 	if (devdata->max_sync_size < SIZE_4K) {
846 		dev_err(devdata->dev, "%s: hardware max data size (%u) is "
847 				"less than the driver minimum, unable to use "
848 				"the hardware device\n",
849 				__func__, devdata->max_sync_size);
850 		ret = -EINVAL;
851 		goto out;
852 	}
853 
854 	devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
855 						maxsynccop->decomp_sg_limit);
856 	if (devdata->max_sync_sg < 1) {
857 		dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
858 				"less than the driver minimum, unable to use "
859 				"the hardware device\n",
860 				__func__, devdata->max_sync_sg);
861 		ret = -EINVAL;
862 		goto out;
863 	}
864 
865 out:
866 	return ret;
867 }
868 
869 /**
870  *
871  * nx842_OF_upd -- Handle OF properties updates for the device.
872  *
873  * Set all properties from the OF tree.  Optionally, a new property
874  * can be provided by the @new_prop pointer to overwrite an existing value.
875  * The device will remain disabled until all values are valid, this function
876  * will return an error for updates unless all values are valid.
877  *
878  * @new_prop: If not NULL, this property is being updated.  If NULL, update
879  *  all properties from the current values in the OF tree.
880  *
881  * Returns:
882  *  0 - Success
883  *  -ENOMEM - Could not allocate memory for new devdata structure
884  *  -EINVAL - property value not found, new_prop is not a recognized
885  *	property for the device or property value is not valid.
886  *  -ENODEV - Device is not available
887  */
888 static int nx842_OF_upd(struct property *new_prop)
889 {
890 	struct nx842_devdata *old_devdata = NULL;
891 	struct nx842_devdata *new_devdata = NULL;
892 	struct device_node *of_node = NULL;
893 	struct property *status = NULL;
894 	struct property *maxsglen = NULL;
895 	struct property *maxsyncop = NULL;
896 	int ret = 0;
897 	unsigned long flags;
898 
899 	spin_lock_irqsave(&devdata_mutex, flags);
900 	old_devdata = rcu_dereference_check(devdata,
901 			lockdep_is_held(&devdata_mutex));
902 	if (old_devdata)
903 		of_node = old_devdata->dev->of_node;
904 
905 	if (!old_devdata || !of_node) {
906 		pr_err("%s: device is not available\n", __func__);
907 		spin_unlock_irqrestore(&devdata_mutex, flags);
908 		return -ENODEV;
909 	}
910 
911 	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
912 	if (!new_devdata) {
913 		dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
914 		ret = -ENOMEM;
915 		goto error_out;
916 	}
917 
918 	memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
919 	new_devdata->counters = old_devdata->counters;
920 
921 	/* Set ptrs for existing properties */
922 	status = of_find_property(of_node, "status", NULL);
923 	maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
924 	maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
925 	if (!status || !maxsglen || !maxsyncop) {
926 		dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
927 		ret = -EINVAL;
928 		goto error_out;
929 	}
930 
931 	/* Set ptr to new property if provided */
932 	if (new_prop) {
933 		/* Single property */
934 		if (!strncmp(new_prop->name, "status", new_prop->length)) {
935 			status = new_prop;
936 
937 		} else if (!strncmp(new_prop->name, "ibm,max-sg-len",
938 					new_prop->length)) {
939 			maxsglen = new_prop;
940 
941 		} else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
942 					new_prop->length)) {
943 			maxsyncop = new_prop;
944 
945 		} else {
946 			/*
947 			 * Skip the update, the property being updated
948 			 * has no impact.
949 			 */
950 			goto out;
951 		}
952 	}
953 
954 	/* Perform property updates */
955 	ret = nx842_OF_upd_status(new_devdata, status);
956 	if (ret)
957 		goto error_out;
958 
959 	ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
960 	if (ret)
961 		goto error_out;
962 
963 	ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
964 	if (ret)
965 		goto error_out;
966 
967 out:
968 	dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
969 			__func__, new_devdata->max_sync_size,
970 			old_devdata->max_sync_size);
971 	dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
972 			__func__, new_devdata->max_sync_sg,
973 			old_devdata->max_sync_sg);
974 	dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
975 			__func__, new_devdata->max_sg_len,
976 			old_devdata->max_sg_len);
977 
978 	rcu_assign_pointer(devdata, new_devdata);
979 	spin_unlock_irqrestore(&devdata_mutex, flags);
980 	synchronize_rcu();
981 	dev_set_drvdata(new_devdata->dev, new_devdata);
982 	kfree(old_devdata);
983 	return 0;
984 
985 error_out:
986 	if (new_devdata) {
987 		dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
988 		nx842_OF_set_defaults(new_devdata);
989 		rcu_assign_pointer(devdata, new_devdata);
990 		spin_unlock_irqrestore(&devdata_mutex, flags);
991 		synchronize_rcu();
992 		dev_set_drvdata(new_devdata->dev, new_devdata);
993 		kfree(old_devdata);
994 	} else {
995 		dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
996 		spin_unlock_irqrestore(&devdata_mutex, flags);
997 	}
998 
999 	if (!ret)
1000 		ret = -EINVAL;
1001 	return ret;
1002 }
1003 
1004 /**
1005  * nx842_OF_notifier - Process updates to OF properties for the device
1006  *
1007  * @np: notifier block
1008  * @action: notifier action
1009  * @update: struct pSeries_reconfig_prop_update pointer if action is
1010  *	PSERIES_UPDATE_PROPERTY
1011  *
1012  * Returns:
1013  *	NOTIFY_OK on success
1014  *	NOTIFY_BAD encoded with error number on failure, use
1015  *		notifier_to_errno() to decode this value
1016  */
1017 static int nx842_OF_notifier(struct notifier_block *np,
1018 					unsigned long action,
1019 					void *update)
1020 {
1021 	struct pSeries_reconfig_prop_update *upd;
1022 	struct nx842_devdata *local_devdata;
1023 	struct device_node *node = NULL;
1024 
1025 	upd = (struct pSeries_reconfig_prop_update *)update;
1026 
1027 	rcu_read_lock();
1028 	local_devdata = rcu_dereference(devdata);
1029 	if (local_devdata)
1030 		node = local_devdata->dev->of_node;
1031 
1032 	if (local_devdata &&
1033 			action == PSERIES_UPDATE_PROPERTY &&
1034 			!strcmp(upd->node->name, node->name)) {
1035 		rcu_read_unlock();
1036 		nx842_OF_upd(upd->property);
1037 	} else
1038 		rcu_read_unlock();
1039 
1040 	return NOTIFY_OK;
1041 }
1042 
1043 static struct notifier_block nx842_of_nb = {
1044 	.notifier_call = nx842_OF_notifier,
1045 };
1046 
1047 #define nx842_counter_read(_name)					\
1048 static ssize_t nx842_##_name##_show(struct device *dev,		\
1049 		struct device_attribute *attr,				\
1050 		char *buf) {						\
1051 	struct nx842_devdata *local_devdata;			\
1052 	int p = 0;							\
1053 	rcu_read_lock();						\
1054 	local_devdata = rcu_dereference(devdata);			\
1055 	if (local_devdata)						\
1056 		p = snprintf(buf, PAGE_SIZE, "%ld\n",			\
1057 		       atomic64_read(&local_devdata->counters->_name));	\
1058 	rcu_read_unlock();						\
1059 	return p;							\
1060 }
1061 
1062 #define NX842DEV_COUNTER_ATTR_RO(_name)					\
1063 	nx842_counter_read(_name);					\
1064 	static struct device_attribute dev_attr_##_name = __ATTR(_name,	\
1065 						0444,			\
1066 						nx842_##_name##_show,\
1067 						NULL);
1068 
1069 NX842DEV_COUNTER_ATTR_RO(comp_complete);
1070 NX842DEV_COUNTER_ATTR_RO(comp_failed);
1071 NX842DEV_COUNTER_ATTR_RO(decomp_complete);
1072 NX842DEV_COUNTER_ATTR_RO(decomp_failed);
1073 NX842DEV_COUNTER_ATTR_RO(swdecomp);
1074 
1075 static ssize_t nx842_timehist_show(struct device *,
1076 		struct device_attribute *, char *);
1077 
1078 static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
1079 		nx842_timehist_show, NULL);
1080 static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
1081 		0444, nx842_timehist_show, NULL);
1082 
1083 static ssize_t nx842_timehist_show(struct device *dev,
1084 		struct device_attribute *attr, char *buf) {
1085 	char *p = buf;
1086 	struct nx842_devdata *local_devdata;
1087 	atomic64_t *times;
1088 	int bytes_remain = PAGE_SIZE;
1089 	int bytes;
1090 	int i;
1091 
1092 	rcu_read_lock();
1093 	local_devdata = rcu_dereference(devdata);
1094 	if (!local_devdata) {
1095 		rcu_read_unlock();
1096 		return 0;
1097 	}
1098 
1099 	if (attr == &dev_attr_comp_times)
1100 		times = local_devdata->counters->comp_times;
1101 	else if (attr == &dev_attr_decomp_times)
1102 		times = local_devdata->counters->decomp_times;
1103 	else {
1104 		rcu_read_unlock();
1105 		return 0;
1106 	}
1107 
1108 	for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
1109 		bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
1110 			       i ? (2<<(i-1)) : 0, (2<<i)-1,
1111 			       atomic64_read(&times[i]));
1112 		bytes_remain -= bytes;
1113 		p += bytes;
1114 	}
1115 	/* The last bucket holds everything over
1116 	 * 2<<(NX842_HIST_SLOTS - 2) us */
1117 	bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
1118 			2<<(NX842_HIST_SLOTS - 2),
1119 			atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
1120 	p += bytes;
1121 
1122 	rcu_read_unlock();
1123 	return p - buf;
1124 }
1125 
1126 static struct attribute *nx842_sysfs_entries[] = {
1127 	&dev_attr_comp_complete.attr,
1128 	&dev_attr_comp_failed.attr,
1129 	&dev_attr_decomp_complete.attr,
1130 	&dev_attr_decomp_failed.attr,
1131 	&dev_attr_swdecomp.attr,
1132 	&dev_attr_comp_times.attr,
1133 	&dev_attr_decomp_times.attr,
1134 	NULL,
1135 };
1136 
1137 static struct attribute_group nx842_attribute_group = {
1138 	.name = NULL,		/* put in device directory */
1139 	.attrs = nx842_sysfs_entries,
1140 };
1141 
1142 static int __init nx842_probe(struct vio_dev *viodev,
1143 				  const struct vio_device_id *id)
1144 {
1145 	struct nx842_devdata *old_devdata, *new_devdata = NULL;
1146 	unsigned long flags;
1147 	int ret = 0;
1148 
1149 	spin_lock_irqsave(&devdata_mutex, flags);
1150 	old_devdata = rcu_dereference_check(devdata,
1151 			lockdep_is_held(&devdata_mutex));
1152 
1153 	if (old_devdata && old_devdata->vdev != NULL) {
1154 		dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
1155 		ret = -1;
1156 		goto error_unlock;
1157 	}
1158 
1159 	dev_set_drvdata(&viodev->dev, NULL);
1160 
1161 	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
1162 	if (!new_devdata) {
1163 		dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
1164 		ret = -ENOMEM;
1165 		goto error_unlock;
1166 	}
1167 
1168 	new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
1169 			GFP_NOFS);
1170 	if (!new_devdata->counters) {
1171 		dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
1172 		ret = -ENOMEM;
1173 		goto error_unlock;
1174 	}
1175 
1176 	new_devdata->vdev = viodev;
1177 	new_devdata->dev = &viodev->dev;
1178 	nx842_OF_set_defaults(new_devdata);
1179 
1180 	rcu_assign_pointer(devdata, new_devdata);
1181 	spin_unlock_irqrestore(&devdata_mutex, flags);
1182 	synchronize_rcu();
1183 	kfree(old_devdata);
1184 
1185 	pSeries_reconfig_notifier_register(&nx842_of_nb);
1186 
1187 	ret = nx842_OF_upd(NULL);
1188 	if (ret && ret != -ENODEV) {
1189 		dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
1190 		ret = -1;
1191 		goto error;
1192 	}
1193 
1194 	rcu_read_lock();
1195 	if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) {
1196 		rcu_read_unlock();
1197 		dev_err(&viodev->dev, "failed to set driver data for device\n");
1198 		ret = -1;
1199 		goto error;
1200 	}
1201 	rcu_read_unlock();
1202 
1203 	if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
1204 		dev_err(&viodev->dev, "could not create sysfs device attributes\n");
1205 		ret = -1;
1206 		goto error;
1207 	}
1208 
1209 	return 0;
1210 
1211 error_unlock:
1212 	spin_unlock_irqrestore(&devdata_mutex, flags);
1213 	if (new_devdata)
1214 		kfree(new_devdata->counters);
1215 	kfree(new_devdata);
1216 error:
1217 	return ret;
1218 }
1219 
1220 static int __exit nx842_remove(struct vio_dev *viodev)
1221 {
1222 	struct nx842_devdata *old_devdata;
1223 	unsigned long flags;
1224 
1225 	pr_info("Removing IBM Power 842 compression device\n");
1226 	sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
1227 
1228 	spin_lock_irqsave(&devdata_mutex, flags);
1229 	old_devdata = rcu_dereference_check(devdata,
1230 			lockdep_is_held(&devdata_mutex));
1231 	pSeries_reconfig_notifier_unregister(&nx842_of_nb);
1232 	rcu_assign_pointer(devdata, NULL);
1233 	spin_unlock_irqrestore(&devdata_mutex, flags);
1234 	synchronize_rcu();
1235 	dev_set_drvdata(&viodev->dev, NULL);
1236 	if (old_devdata)
1237 		kfree(old_devdata->counters);
1238 	kfree(old_devdata);
1239 	return 0;
1240 }
1241 
1242 static struct vio_device_id nx842_driver_ids[] = {
1243 	{"ibm,compression-v1", "ibm,compression"},
1244 	{"", ""},
1245 };
1246 
1247 static struct vio_driver nx842_driver = {
1248 	.name = MODULE_NAME,
1249 	.probe = nx842_probe,
1250 	.remove = nx842_remove,
1251 	.get_desired_dma = nx842_get_desired_dma,
1252 	.id_table = nx842_driver_ids,
1253 };
1254 
1255 static int __init nx842_init(void)
1256 {
1257 	struct nx842_devdata *new_devdata;
1258 	pr_info("Registering IBM Power 842 compression driver\n");
1259 
1260 	RCU_INIT_POINTER(devdata, NULL);
1261 	new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
1262 	if (!new_devdata) {
1263 		pr_err("Could not allocate memory for device data\n");
1264 		return -ENOMEM;
1265 	}
1266 	new_devdata->status = UNAVAILABLE;
1267 	RCU_INIT_POINTER(devdata, new_devdata);
1268 
1269 	return vio_register_driver(&nx842_driver);
1270 }
1271 
1272 module_init(nx842_init);
1273 
1274 static void __exit nx842_exit(void)
1275 {
1276 	struct nx842_devdata *old_devdata;
1277 	unsigned long flags;
1278 
1279 	pr_info("Exiting IBM Power 842 compression driver\n");
1280 	spin_lock_irqsave(&devdata_mutex, flags);
1281 	old_devdata = rcu_dereference_check(devdata,
1282 			lockdep_is_held(&devdata_mutex));
1283 	rcu_assign_pointer(devdata, NULL);
1284 	spin_unlock_irqrestore(&devdata_mutex, flags);
1285 	synchronize_rcu();
1286 	if (old_devdata)
1287 		dev_set_drvdata(old_devdata->dev, NULL);
1288 	kfree(old_devdata);
1289 	vio_unregister_driver(&nx842_driver);
1290 }
1291 
1292 module_exit(nx842_exit);
1293 
1294 /*********************************
1295  * 842 software decompressor
1296 *********************************/
1297 typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
1298 						struct sw842_fifo *);
1299 
1300 static int sw842_data8(const char **, int *, unsigned char **,
1301 						struct sw842_fifo *);
1302 static int sw842_data4(const char **, int *, unsigned char **,
1303 						struct sw842_fifo *);
1304 static int sw842_data2(const char **, int *, unsigned char **,
1305 						struct sw842_fifo *);
1306 static int sw842_ptr8(const char **, int *, unsigned char **,
1307 						struct sw842_fifo *);
1308 static int sw842_ptr4(const char **, int *, unsigned char **,
1309 						struct sw842_fifo *);
1310 static int sw842_ptr2(const char **, int *, unsigned char **,
1311 						struct sw842_fifo *);
1312 
1313 /* special templates */
1314 #define SW842_TMPL_REPEAT 0x1B
1315 #define SW842_TMPL_ZEROS 0x1C
1316 #define SW842_TMPL_EOF 0x1E
1317 
1318 static sw842_template_op sw842_tmpl_ops[26][4] = {
1319 	{ sw842_data8, NULL}, /* 0 (00000) */
1320 	{ sw842_data4, sw842_data2, sw842_ptr2,  NULL},
1321 	{ sw842_data4, sw842_ptr2,  sw842_data2, NULL},
1322 	{ sw842_data4, sw842_ptr2,  sw842_ptr2,  NULL},
1323 	{ sw842_data4, sw842_ptr4,  NULL},
1324 	{ sw842_data2, sw842_ptr2,  sw842_data4, NULL},
1325 	{ sw842_data2, sw842_ptr2,  sw842_data2, sw842_ptr2},
1326 	{ sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_data2},
1327 	{ sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_ptr2,},
1328 	{ sw842_data2, sw842_ptr2,  sw842_ptr4,  NULL},
1329 	{ sw842_ptr2,  sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
1330 	{ sw842_ptr2,  sw842_data4, sw842_ptr2,  NULL},
1331 	{ sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_data2},
1332 	{ sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_ptr2},
1333 	{ sw842_ptr2,  sw842_data2, sw842_ptr4,  NULL},
1334 	{ sw842_ptr2,  sw842_ptr2,  sw842_data4, NULL},
1335 	{ sw842_ptr2,  sw842_ptr2,  sw842_data2, sw842_ptr2},
1336 	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_data2},
1337 	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_ptr2},
1338 	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr4,  NULL},
1339 	{ sw842_ptr4,  sw842_data4, NULL}, /* 20 (10100) */
1340 	{ sw842_ptr4,  sw842_data2, sw842_ptr2,  NULL},
1341 	{ sw842_ptr4,  sw842_ptr2,  sw842_data2, NULL},
1342 	{ sw842_ptr4,  sw842_ptr2,  sw842_ptr2,  NULL},
1343 	{ sw842_ptr4,  sw842_ptr4,  NULL},
1344 	{ sw842_ptr8,  NULL}
1345 };
1346 
1347 /* Software decompress helpers */
1348 
1349 static uint8_t sw842_get_byte(const char *buf, int bit)
1350 {
1351 	uint8_t tmpl;
1352 	uint16_t tmp;
1353 	tmp = htons(*(uint16_t *)(buf));
1354 	tmp = (uint16_t)(tmp << bit);
1355 	tmp = ntohs(tmp);
1356 	memcpy(&tmpl, &tmp, 1);
1357 	return tmpl;
1358 }
1359 
1360 static uint8_t sw842_get_template(const char **buf, int *bit)
1361 {
1362 	uint8_t byte;
1363 	byte = sw842_get_byte(*buf, *bit);
1364 	byte = byte >> 3;
1365 	byte &= 0x1F;
1366 	*buf += (*bit + 5) / 8;
1367 	*bit = (*bit + 5) % 8;
1368 	return byte;
1369 }
1370 
1371 /* repeat_count happens to be 5-bit too (like the template) */
1372 static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
1373 {
1374 	uint8_t byte;
1375 	byte = sw842_get_byte(*buf, *bit);
1376 	byte = byte >> 2;
1377 	byte &= 0x3F;
1378 	*buf += (*bit + 6) / 8;
1379 	*bit = (*bit + 6) % 8;
1380 	return byte;
1381 }
1382 
1383 static uint8_t sw842_get_ptr2(const char **buf, int *bit)
1384 {
1385 	uint8_t ptr;
1386 	ptr = sw842_get_byte(*buf, *bit);
1387 	(*buf)++;
1388 	return ptr;
1389 }
1390 
1391 static uint16_t sw842_get_ptr4(const char **buf, int *bit,
1392 		struct sw842_fifo *fifo)
1393 {
1394 	uint16_t ptr;
1395 	ptr = htons(*(uint16_t *)(*buf));
1396 	ptr = (uint16_t)(ptr << *bit);
1397 	ptr = ptr >> 7;
1398 	ptr &= 0x01FF;
1399 	*buf += (*bit + 9) / 8;
1400 	*bit = (*bit + 9) % 8;
1401 	return ptr;
1402 }
1403 
1404 static uint8_t sw842_get_ptr8(const char **buf, int *bit,
1405 		struct sw842_fifo *fifo)
1406 {
1407 	return sw842_get_ptr2(buf, bit);
1408 }
1409 
1410 /* Software decompress template ops */
1411 
1412 static int sw842_data8(const char **inbuf, int *inbit,
1413 		unsigned char **outbuf, struct sw842_fifo *fifo)
1414 {
1415 	int ret;
1416 
1417 	ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1418 	if (ret)
1419 		return ret;
1420 	ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1421 	return ret;
1422 }
1423 
1424 static int sw842_data4(const char **inbuf, int *inbit,
1425 		unsigned char **outbuf, struct sw842_fifo *fifo)
1426 {
1427 	int ret;
1428 
1429 	ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1430 	if (ret)
1431 		return ret;
1432 	ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1433 	return ret;
1434 }
1435 
1436 static int sw842_data2(const char **inbuf, int *inbit,
1437 		unsigned char **outbuf, struct sw842_fifo *fifo)
1438 {
1439 	**outbuf = sw842_get_byte(*inbuf, *inbit);
1440 	(*inbuf)++;
1441 	(*outbuf)++;
1442 	**outbuf = sw842_get_byte(*inbuf, *inbit);
1443 	(*inbuf)++;
1444 	(*outbuf)++;
1445 	return 0;
1446 }
1447 
1448 static int sw842_ptr8(const char **inbuf, int *inbit,
1449 		unsigned char **outbuf, struct sw842_fifo *fifo)
1450 {
1451 	uint8_t ptr;
1452 	ptr = sw842_get_ptr8(inbuf, inbit, fifo);
1453 	if (!fifo->f84_full && (ptr >= fifo->f8_count))
1454 		return 1;
1455 	memcpy(*outbuf, fifo->f8[ptr], 8);
1456 	*outbuf += 8;
1457 	return 0;
1458 }
1459 
1460 static int sw842_ptr4(const char **inbuf, int *inbit,
1461 		unsigned char **outbuf, struct sw842_fifo *fifo)
1462 {
1463 	uint16_t ptr;
1464 	ptr = sw842_get_ptr4(inbuf, inbit, fifo);
1465 	if (!fifo->f84_full && (ptr >= fifo->f4_count))
1466 		return 1;
1467 	memcpy(*outbuf, fifo->f4[ptr], 4);
1468 	*outbuf += 4;
1469 	return 0;
1470 }
1471 
1472 static int sw842_ptr2(const char **inbuf, int *inbit,
1473 		unsigned char **outbuf, struct sw842_fifo *fifo)
1474 {
1475 	uint8_t ptr;
1476 	ptr = sw842_get_ptr2(inbuf, inbit);
1477 	if (!fifo->f2_full && (ptr >= fifo->f2_count))
1478 		return 1;
1479 	memcpy(*outbuf, fifo->f2[ptr], 2);
1480 	*outbuf += 2;
1481 	return 0;
1482 }
1483 
1484 static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
1485 {
1486 	unsigned char initial_f2count = fifo->f2_count;
1487 
1488 	memcpy(fifo->f8[fifo->f8_count], buf, 8);
1489 	fifo->f4_count += 2;
1490 	fifo->f8_count += 1;
1491 
1492 	if (!fifo->f84_full && fifo->f4_count >= 512) {
1493 		fifo->f84_full = 1;
1494 		fifo->f4_count /= 512;
1495 	}
1496 
1497 	memcpy(fifo->f2[fifo->f2_count++], buf, 2);
1498 	memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
1499 	memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
1500 	memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
1501 	if (fifo->f2_count < initial_f2count)
1502 		fifo->f2_full = 1;
1503 }
1504 
1505 static int sw842_decompress(const unsigned char *src, int srclen,
1506 			unsigned char *dst, int *destlen,
1507 			const void *wrkmem)
1508 {
1509 	uint8_t tmpl;
1510 	const char *inbuf;
1511 	int inbit = 0;
1512 	unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
1513 	const char *inbuf_end;
1514 	sw842_template_op op;
1515 	int opindex;
1516 	int i, repeat_count;
1517 	struct sw842_fifo *fifo;
1518 	int ret = 0;
1519 
1520 	fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
1521 	memset(fifo, 0, sizeof(*fifo));
1522 
1523 	origbuf = NULL;
1524 	inbuf = src;
1525 	inbuf_end = src + srclen;
1526 	outbuf = dst;
1527 	outbuf_end = dst + *destlen;
1528 
1529 	while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
1530 		if (inbuf >= inbuf_end) {
1531 			ret = -EINVAL;
1532 			goto out;
1533 		}
1534 
1535 		opindex = 0;
1536 		prevbuf = origbuf;
1537 		origbuf = outbuf;
1538 		switch (tmpl) {
1539 		case SW842_TMPL_REPEAT:
1540 			if (prevbuf == NULL) {
1541 				ret = -EINVAL;
1542 				goto out;
1543 			}
1544 
1545 			repeat_count = sw842_get_repeat_count(&inbuf,
1546 								&inbit) + 1;
1547 
1548 			/* Did the repeat count advance past the end of input */
1549 			if (inbuf > inbuf_end) {
1550 				ret = -EINVAL;
1551 				goto out;
1552 			}
1553 
1554 			for (i = 0; i < repeat_count; i++) {
1555 				/* Would this overflow the output buffer */
1556 				if ((outbuf + 8) > outbuf_end) {
1557 					ret = -ENOSPC;
1558 					goto out;
1559 				}
1560 
1561 				memcpy(outbuf, prevbuf, 8);
1562 				sw842_copy_to_fifo(outbuf, fifo);
1563 				outbuf += 8;
1564 			}
1565 			break;
1566 
1567 		case SW842_TMPL_ZEROS:
1568 			/* Would this overflow the output buffer */
1569 			if ((outbuf + 8) > outbuf_end) {
1570 				ret = -ENOSPC;
1571 				goto out;
1572 			}
1573 
1574 			memset(outbuf, 0, 8);
1575 			sw842_copy_to_fifo(outbuf, fifo);
1576 			outbuf += 8;
1577 			break;
1578 
1579 		default:
1580 			if (tmpl > 25) {
1581 				ret = -EINVAL;
1582 				goto out;
1583 			}
1584 
1585 			/* Does this go past the end of the input buffer */
1586 			if ((inbuf + 2) > inbuf_end) {
1587 				ret = -EINVAL;
1588 				goto out;
1589 			}
1590 
1591 			/* Would this overflow the output buffer */
1592 			if ((outbuf + 8) > outbuf_end) {
1593 				ret = -ENOSPC;
1594 				goto out;
1595 			}
1596 
1597 			while (opindex < 4 &&
1598 				(op = sw842_tmpl_ops[tmpl][opindex++])
1599 					!= NULL) {
1600 				ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
1601 				if (ret) {
1602 					ret = -EINVAL;
1603 					goto out;
1604 				}
1605 				sw842_copy_to_fifo(origbuf, fifo);
1606 			}
1607 		}
1608 	}
1609 
1610 out:
1611 	if (!ret)
1612 		*destlen = (unsigned int)(outbuf - dst);
1613 	else
1614 		*destlen = 0;
1615 
1616 	return ret;
1617 }
1618