1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Driver for IBM PowerNV compression accelerator
4 *
5 * Copyright (C) 2015 Dan Streetman, IBM Corp
6 */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include "nx-842.h"
11
12 #include <linux/timer.h>
13
14 #include <asm/prom.h>
15 #include <asm/icswx.h>
16 #include <asm/vas.h>
17 #include <asm/reg.h>
18 #include <asm/opal-api.h>
19 #include <asm/opal.h>
20
21 MODULE_LICENSE("GPL");
22 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
23 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors");
24 MODULE_ALIAS_CRYPTO("842");
25 MODULE_ALIAS_CRYPTO("842-nx");
26
27 #define WORKMEM_ALIGN (CRB_ALIGN)
28 #define CSB_WAIT_MAX (5000) /* ms */
29 #define VAS_RETRIES (10)
30
31 struct nx842_workmem {
32 /* Below fields must be properly aligned */
33 struct coprocessor_request_block crb; /* CRB_ALIGN align */
34 struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
35 struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
36 /* Above fields must be properly aligned */
37
38 ktime_t start;
39
40 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
41 } __packed __aligned(WORKMEM_ALIGN);
42
43 struct nx_coproc {
44 unsigned int chip_id;
45 unsigned int ct; /* Can be 842 or GZIP high/normal*/
46 unsigned int ci; /* Coprocessor instance, used with icswx */
47 struct {
48 struct vas_window *rxwin;
49 int id;
50 } vas;
51 struct list_head list;
52 };
53
54 /*
55 * Send the request to NX engine on the chip for the corresponding CPU
56 * where the process is executing. Use with VAS function.
57 */
58 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
59
60 /* no cpu hotplug on powernv, so this list never changes after init */
61 static LIST_HEAD(nx_coprocs);
62 static unsigned int nx842_ct; /* used in icswx function */
63
64 /*
65 * Using same values as in skiboot or coprocessor type representing
66 * in NX workbook.
67 */
68 #define NX_CT_GZIP (2) /* on P9 and later */
69 #define NX_CT_842 (3)
70
71 static int (*nx842_powernv_exec)(const unsigned char *in,
72 unsigned int inlen, unsigned char *out,
73 unsigned int *outlenp, void *workmem, int fc);
74
75 /*
76 * setup_indirect_dde - Setup an indirect DDE
77 *
78 * The DDE is setup with the DDE count, byte count, and address of
79 * first direct DDE in the list.
80 */
setup_indirect_dde(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned int dde_count,unsigned int byte_count)81 static void setup_indirect_dde(struct data_descriptor_entry *dde,
82 struct data_descriptor_entry *ddl,
83 unsigned int dde_count, unsigned int byte_count)
84 {
85 dde->flags = 0;
86 dde->count = dde_count;
87 dde->index = 0;
88 dde->length = cpu_to_be32(byte_count);
89 dde->address = cpu_to_be64(nx842_get_pa(ddl));
90 }
91
92 /*
93 * setup_direct_dde - Setup single DDE from buffer
94 *
95 * The DDE is setup with the buffer and length. The buffer must be properly
96 * aligned. The used length is returned.
97 * Returns:
98 * N Successfully set up DDE with N bytes
99 */
setup_direct_dde(struct data_descriptor_entry * dde,unsigned long pa,unsigned int len)100 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
101 unsigned long pa, unsigned int len)
102 {
103 unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
104
105 dde->flags = 0;
106 dde->count = 0;
107 dde->index = 0;
108 dde->length = cpu_to_be32(l);
109 dde->address = cpu_to_be64(pa);
110
111 return l;
112 }
113
114 /*
115 * setup_ddl - Setup DDL from buffer
116 *
117 * Returns:
118 * 0 Successfully set up DDL
119 */
setup_ddl(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned char * buf,unsigned int len,bool in)120 static int setup_ddl(struct data_descriptor_entry *dde,
121 struct data_descriptor_entry *ddl,
122 unsigned char *buf, unsigned int len,
123 bool in)
124 {
125 unsigned long pa = nx842_get_pa(buf);
126 int i, ret, total_len = len;
127
128 if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
129 pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
130 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
131 return -EINVAL;
132 }
133
134 /* only need to check last mult; since buffer must be
135 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
136 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
137 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
138 */
139 if (len % DDE_BUFFER_LAST_MULT) {
140 pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
141 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
142 if (in)
143 return -EINVAL;
144 len = round_down(len, DDE_BUFFER_LAST_MULT);
145 }
146
147 /* use a single direct DDE */
148 if (len <= LEN_ON_PAGE(pa)) {
149 ret = setup_direct_dde(dde, pa, len);
150 WARN_ON(ret < len);
151 return 0;
152 }
153
154 /* use the DDL */
155 for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
156 ret = setup_direct_dde(&ddl[i], pa, len);
157 buf += ret;
158 len -= ret;
159 pa = nx842_get_pa(buf);
160 }
161
162 if (len > 0) {
163 pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
164 total_len, in ? "input" : "output", len);
165 if (in)
166 return -EMSGSIZE;
167 total_len -= len;
168 }
169 setup_indirect_dde(dde, ddl, i, total_len);
170
171 return 0;
172 }
173
174 #define CSB_ERR(csb, msg, ...) \
175 pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \
176 ##__VA_ARGS__, (csb)->flags, \
177 (csb)->cs, (csb)->cc, (csb)->ce, \
178 be32_to_cpu((csb)->count))
179
180 #define CSB_ERR_ADDR(csb, msg, ...) \
181 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \
182 (unsigned long)be64_to_cpu((csb)->address))
183
wait_for_csb(struct nx842_workmem * wmem,struct coprocessor_status_block * csb)184 static int wait_for_csb(struct nx842_workmem *wmem,
185 struct coprocessor_status_block *csb)
186 {
187 ktime_t start = wmem->start, now = ktime_get();
188 ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
189
190 while (!(READ_ONCE(csb->flags) & CSB_V)) {
191 cpu_relax();
192 now = ktime_get();
193 if (ktime_after(now, timeout))
194 break;
195 }
196
197 /* hw has updated csb and output buffer */
198 barrier();
199
200 /* check CSB flags */
201 if (!(csb->flags & CSB_V)) {
202 CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
203 (long)ktime_us_delta(now, start));
204 return -ETIMEDOUT;
205 }
206 if (csb->flags & CSB_F) {
207 CSB_ERR(csb, "Invalid CSB format");
208 return -EPROTO;
209 }
210 if (csb->flags & CSB_CH) {
211 CSB_ERR(csb, "Invalid CSB chaining state");
212 return -EPROTO;
213 }
214
215 /* verify CSB completion sequence is 0 */
216 if (csb->cs) {
217 CSB_ERR(csb, "Invalid CSB completion sequence");
218 return -EPROTO;
219 }
220
221 /* check CSB Completion Code */
222 switch (csb->cc) {
223 /* no error */
224 case CSB_CC_SUCCESS:
225 break;
226 case CSB_CC_TPBC_GT_SPBC:
227 /* not an error, but the compressed data is
228 * larger than the uncompressed data :(
229 */
230 break;
231
232 /* input data errors */
233 case CSB_CC_OPERAND_OVERLAP:
234 /* input and output buffers overlap */
235 CSB_ERR(csb, "Operand Overlap error");
236 return -EINVAL;
237 case CSB_CC_INVALID_OPERAND:
238 CSB_ERR(csb, "Invalid operand");
239 return -EINVAL;
240 case CSB_CC_NOSPC:
241 /* output buffer too small */
242 return -ENOSPC;
243 case CSB_CC_ABORT:
244 CSB_ERR(csb, "Function aborted");
245 return -EINTR;
246 case CSB_CC_CRC_MISMATCH:
247 CSB_ERR(csb, "CRC mismatch");
248 return -EINVAL;
249 case CSB_CC_TEMPL_INVALID:
250 CSB_ERR(csb, "Compressed data template invalid");
251 return -EINVAL;
252 case CSB_CC_TEMPL_OVERFLOW:
253 CSB_ERR(csb, "Compressed data template shows data past end");
254 return -EINVAL;
255 case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */
256 /*
257 * DDE byte count exceeds the limit specified in Maximum
258 * byte count register.
259 */
260 CSB_ERR(csb, "DDE byte count exceeds the limit");
261 return -EINVAL;
262
263 /* these should not happen */
264 case CSB_CC_INVALID_ALIGN:
265 /* setup_ddl should have detected this */
266 CSB_ERR_ADDR(csb, "Invalid alignment");
267 return -EINVAL;
268 case CSB_CC_DATA_LENGTH:
269 /* setup_ddl should have detected this */
270 CSB_ERR(csb, "Invalid data length");
271 return -EINVAL;
272 case CSB_CC_WR_TRANSLATION:
273 case CSB_CC_TRANSLATION:
274 case CSB_CC_TRANSLATION_DUP1:
275 case CSB_CC_TRANSLATION_DUP2:
276 case CSB_CC_TRANSLATION_DUP3:
277 case CSB_CC_TRANSLATION_DUP4:
278 case CSB_CC_TRANSLATION_DUP5:
279 case CSB_CC_TRANSLATION_DUP6:
280 /* should not happen, we use physical addrs */
281 CSB_ERR_ADDR(csb, "Translation error");
282 return -EPROTO;
283 case CSB_CC_WR_PROTECTION:
284 case CSB_CC_PROTECTION:
285 case CSB_CC_PROTECTION_DUP1:
286 case CSB_CC_PROTECTION_DUP2:
287 case CSB_CC_PROTECTION_DUP3:
288 case CSB_CC_PROTECTION_DUP4:
289 case CSB_CC_PROTECTION_DUP5:
290 case CSB_CC_PROTECTION_DUP6:
291 /* should not happen, we use physical addrs */
292 CSB_ERR_ADDR(csb, "Protection error");
293 return -EPROTO;
294 case CSB_CC_PRIVILEGE:
295 /* shouldn't happen, we're in HYP mode */
296 CSB_ERR(csb, "Insufficient Privilege error");
297 return -EPROTO;
298 case CSB_CC_EXCESSIVE_DDE:
299 /* shouldn't happen, setup_ddl doesn't use many dde's */
300 CSB_ERR(csb, "Too many DDEs in DDL");
301 return -EINVAL;
302 case CSB_CC_TRANSPORT:
303 case CSB_CC_INVALID_CRB: /* P9 or later */
304 /* shouldn't happen, we setup CRB correctly */
305 CSB_ERR(csb, "Invalid CRB");
306 return -EINVAL;
307 case CSB_CC_INVALID_DDE: /* P9 or later */
308 /*
309 * shouldn't happen, setup_direct/indirect_dde creates
310 * DDE right
311 */
312 CSB_ERR(csb, "Invalid DDE");
313 return -EINVAL;
314 case CSB_CC_SEGMENTED_DDL:
315 /* shouldn't happen, setup_ddl creates DDL right */
316 CSB_ERR(csb, "Segmented DDL error");
317 return -EINVAL;
318 case CSB_CC_DDE_OVERFLOW:
319 /* shouldn't happen, setup_ddl creates DDL right */
320 CSB_ERR(csb, "DDE overflow error");
321 return -EINVAL;
322 case CSB_CC_SESSION:
323 /* should not happen with ICSWX */
324 CSB_ERR(csb, "Session violation error");
325 return -EPROTO;
326 case CSB_CC_CHAIN:
327 /* should not happen, we don't use chained CRBs */
328 CSB_ERR(csb, "Chained CRB error");
329 return -EPROTO;
330 case CSB_CC_SEQUENCE:
331 /* should not happen, we don't use chained CRBs */
332 CSB_ERR(csb, "CRB sequence number error");
333 return -EPROTO;
334 case CSB_CC_UNKNOWN_CODE:
335 CSB_ERR(csb, "Unknown subfunction code");
336 return -EPROTO;
337
338 /* hardware errors */
339 case CSB_CC_RD_EXTERNAL:
340 case CSB_CC_RD_EXTERNAL_DUP1:
341 case CSB_CC_RD_EXTERNAL_DUP2:
342 case CSB_CC_RD_EXTERNAL_DUP3:
343 CSB_ERR_ADDR(csb, "Read error outside coprocessor");
344 return -EPROTO;
345 case CSB_CC_WR_EXTERNAL:
346 CSB_ERR_ADDR(csb, "Write error outside coprocessor");
347 return -EPROTO;
348 case CSB_CC_INTERNAL:
349 CSB_ERR(csb, "Internal error in coprocessor");
350 return -EPROTO;
351 case CSB_CC_PROVISION:
352 CSB_ERR(csb, "Storage provision error");
353 return -EPROTO;
354 case CSB_CC_HW:
355 CSB_ERR(csb, "Correctable hardware error");
356 return -EPROTO;
357 case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */
358 CSB_ERR(csb, "Job did not finish within allowed time");
359 return -EPROTO;
360
361 default:
362 CSB_ERR(csb, "Invalid CC %d", csb->cc);
363 return -EPROTO;
364 }
365
366 /* check Completion Extension state */
367 if (csb->ce & CSB_CE_TERMINATION) {
368 CSB_ERR(csb, "CSB request was terminated");
369 return -EPROTO;
370 }
371 if (csb->ce & CSB_CE_INCOMPLETE) {
372 CSB_ERR(csb, "CSB request not complete");
373 return -EPROTO;
374 }
375 if (!(csb->ce & CSB_CE_TPBC)) {
376 CSB_ERR(csb, "TPBC not provided, unknown target length");
377 return -EPROTO;
378 }
379
380 /* successful completion */
381 pr_debug_ratelimited("Processed %u bytes in %lu us\n",
382 be32_to_cpu(csb->count),
383 (unsigned long)ktime_us_delta(now, start));
384
385 return 0;
386 }
387
nx842_config_crb(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int outlen,struct nx842_workmem * wmem)388 static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
389 unsigned char *out, unsigned int outlen,
390 struct nx842_workmem *wmem)
391 {
392 struct coprocessor_request_block *crb;
393 struct coprocessor_status_block *csb;
394 u64 csb_addr;
395 int ret;
396
397 crb = &wmem->crb;
398 csb = &crb->csb;
399
400 /* Clear any previous values */
401 memset(crb, 0, sizeof(*crb));
402
403 /* set up DDLs */
404 ret = setup_ddl(&crb->source, wmem->ddl_in,
405 (unsigned char *)in, inlen, true);
406 if (ret)
407 return ret;
408
409 ret = setup_ddl(&crb->target, wmem->ddl_out,
410 out, outlen, false);
411 if (ret)
412 return ret;
413
414 /* set up CRB's CSB addr */
415 csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
416 csb_addr |= CRB_CSB_AT; /* Addrs are phys */
417 crb->csb_addr = cpu_to_be64(csb_addr);
418
419 return 0;
420 }
421
422 /**
423 * nx842_exec_icswx - compress/decompress data using the 842 algorithm
424 *
425 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
426 * This compresses or decompresses the provided input buffer into the provided
427 * output buffer.
428 *
429 * Upon return from this function @outlen contains the length of the
430 * output data. If there is an error then @outlen will be 0 and an
431 * error will be specified by the return code from this function.
432 *
433 * The @workmem buffer should only be used by one function call at a time.
434 *
435 * @in: input buffer pointer
436 * @inlen: input buffer size
437 * @out: output buffer pointer
438 * @outlenp: output buffer size pointer
439 * @workmem: working memory buffer pointer, size determined by
440 * nx842_powernv_driver.workmem_size
441 * @fc: function code, see CCW Function Codes in nx-842.h
442 *
443 * Returns:
444 * 0 Success, output of length @outlenp stored in the buffer at @out
445 * -ENODEV Hardware unavailable
446 * -ENOSPC Output buffer is to small
447 * -EMSGSIZE Input buffer too large
448 * -EINVAL buffer constraints do not fix nx842_constraints
449 * -EPROTO hardware error during operation
450 * -ETIMEDOUT hardware did not complete operation in reasonable time
451 * -EINTR operation was aborted
452 */
nx842_exec_icswx(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)453 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
454 unsigned char *out, unsigned int *outlenp,
455 void *workmem, int fc)
456 {
457 struct coprocessor_request_block *crb;
458 struct coprocessor_status_block *csb;
459 struct nx842_workmem *wmem;
460 int ret;
461 u32 ccw;
462 unsigned int outlen = *outlenp;
463
464 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
465
466 *outlenp = 0;
467
468 /* shoudn't happen, we don't load without a coproc */
469 if (!nx842_ct) {
470 pr_err_ratelimited("coprocessor CT is 0");
471 return -ENODEV;
472 }
473
474 ret = nx842_config_crb(in, inlen, out, outlen, wmem);
475 if (ret)
476 return ret;
477
478 crb = &wmem->crb;
479 csb = &crb->csb;
480
481 /* set up CCW */
482 ccw = 0;
483 ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
484 ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
485 ccw = SET_FIELD(CCW_FC_842, ccw, fc);
486
487 wmem->start = ktime_get();
488
489 /* do ICSWX */
490 ret = icswx(cpu_to_be32(ccw), crb);
491
492 pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
493 (unsigned int)ccw,
494 (unsigned int)be32_to_cpu(crb->ccw));
495
496 /*
497 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
498 * XER[S0] is the integer summary overflow bit which is nothing
499 * to do NX. Since this bit can be set with other return values,
500 * mask this bit.
501 */
502 ret &= ~ICSWX_XERS0;
503
504 switch (ret) {
505 case ICSWX_INITIATED:
506 ret = wait_for_csb(wmem, csb);
507 break;
508 case ICSWX_BUSY:
509 pr_debug_ratelimited("842 Coprocessor busy\n");
510 ret = -EBUSY;
511 break;
512 case ICSWX_REJECTED:
513 pr_err_ratelimited("ICSWX rejected\n");
514 ret = -EPROTO;
515 break;
516 }
517
518 if (!ret)
519 *outlenp = be32_to_cpu(csb->count);
520
521 return ret;
522 }
523
524 /**
525 * nx842_exec_vas - compress/decompress data using the 842 algorithm
526 *
527 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
528 * This compresses or decompresses the provided input buffer into the provided
529 * output buffer.
530 *
531 * Upon return from this function @outlen contains the length of the
532 * output data. If there is an error then @outlen will be 0 and an
533 * error will be specified by the return code from this function.
534 *
535 * The @workmem buffer should only be used by one function call at a time.
536 *
537 * @in: input buffer pointer
538 * @inlen: input buffer size
539 * @out: output buffer pointer
540 * @outlenp: output buffer size pointer
541 * @workmem: working memory buffer pointer, size determined by
542 * nx842_powernv_driver.workmem_size
543 * @fc: function code, see CCW Function Codes in nx-842.h
544 *
545 * Returns:
546 * 0 Success, output of length @outlenp stored in the buffer
547 * at @out
548 * -ENODEV Hardware unavailable
549 * -ENOSPC Output buffer is to small
550 * -EMSGSIZE Input buffer too large
551 * -EINVAL buffer constraints do not fix nx842_constraints
552 * -EPROTO hardware error during operation
553 * -ETIMEDOUT hardware did not complete operation in reasonable time
554 * -EINTR operation was aborted
555 */
nx842_exec_vas(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)556 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
557 unsigned char *out, unsigned int *outlenp,
558 void *workmem, int fc)
559 {
560 struct coprocessor_request_block *crb;
561 struct coprocessor_status_block *csb;
562 struct nx842_workmem *wmem;
563 struct vas_window *txwin;
564 int ret, i = 0;
565 u32 ccw;
566 unsigned int outlen = *outlenp;
567
568 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
569
570 *outlenp = 0;
571
572 crb = &wmem->crb;
573 csb = &crb->csb;
574
575 ret = nx842_config_crb(in, inlen, out, outlen, wmem);
576 if (ret)
577 return ret;
578
579 ccw = 0;
580 ccw = SET_FIELD(CCW_FC_842, ccw, fc);
581 crb->ccw = cpu_to_be32(ccw);
582
583 do {
584 wmem->start = ktime_get();
585 preempt_disable();
586 txwin = this_cpu_read(cpu_txwin);
587
588 /*
589 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
590 * @crb and @offset.
591 */
592 vas_copy_crb(crb, 0);
593
594 /*
595 * VAS paste previously copied CRB to NX.
596 * @txwin, @offset and @last (must be true).
597 */
598 ret = vas_paste_crb(txwin, 0, 1);
599 preempt_enable();
600 /*
601 * Retry copy/paste function for VAS failures.
602 */
603 } while (ret && (i++ < VAS_RETRIES));
604
605 if (ret) {
606 pr_err_ratelimited("VAS copy/paste failed\n");
607 return ret;
608 }
609
610 ret = wait_for_csb(wmem, csb);
611 if (!ret)
612 *outlenp = be32_to_cpu(csb->count);
613
614 return ret;
615 }
616
617 /**
618 * nx842_powernv_compress - Compress data using the 842 algorithm
619 *
620 * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
621 * The input buffer is compressed and the result is stored in the
622 * provided output buffer.
623 *
624 * Upon return from this function @outlen contains the length of the
625 * compressed data. If there is an error then @outlen will be 0 and an
626 * error will be specified by the return code from this function.
627 *
628 * @in: input buffer pointer
629 * @inlen: input buffer size
630 * @out: output buffer pointer
631 * @outlenp: output buffer size pointer
632 * @wmem: working memory buffer pointer, size determined by
633 * nx842_powernv_driver.workmem_size
634 *
635 * Returns: see @nx842_powernv_exec()
636 */
nx842_powernv_compress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)637 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
638 unsigned char *out, unsigned int *outlenp,
639 void *wmem)
640 {
641 return nx842_powernv_exec(in, inlen, out, outlenp,
642 wmem, CCW_FC_842_COMP_CRC);
643 }
644
645 /**
646 * nx842_powernv_decompress - Decompress data using the 842 algorithm
647 *
648 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
649 * The input buffer is decompressed and the result is stored in the
650 * provided output buffer.
651 *
652 * Upon return from this function @outlen contains the length of the
653 * decompressed data. If there is an error then @outlen will be 0 and an
654 * error will be specified by the return code from this function.
655 *
656 * @in: input buffer pointer
657 * @inlen: input buffer size
658 * @out: output buffer pointer
659 * @outlenp: output buffer size pointer
660 * @wmem: working memory buffer pointer, size determined by
661 * nx842_powernv_driver.workmem_size
662 *
663 * Returns: see @nx842_powernv_exec()
664 */
nx842_powernv_decompress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)665 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
666 unsigned char *out, unsigned int *outlenp,
667 void *wmem)
668 {
669 return nx842_powernv_exec(in, inlen, out, outlenp,
670 wmem, CCW_FC_842_DECOMP_CRC);
671 }
672
nx_add_coprocs_list(struct nx_coproc * coproc,int chipid)673 static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
674 int chipid)
675 {
676 coproc->chip_id = chipid;
677 INIT_LIST_HEAD(&coproc->list);
678 list_add(&coproc->list, &nx_coprocs);
679 }
680
nx_alloc_txwin(struct nx_coproc * coproc)681 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
682 {
683 struct vas_window *txwin = NULL;
684 struct vas_tx_win_attr txattr;
685
686 /*
687 * Kernel requests will be high priority. So open send
688 * windows only for high priority RxFIFO entries.
689 */
690 vas_init_tx_win_attr(&txattr, coproc->ct);
691 txattr.lpid = 0; /* lpid is 0 for kernel requests */
692
693 /*
694 * Open a VAS send window which is used to send request to NX.
695 */
696 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
697 if (IS_ERR(txwin))
698 pr_err("ibm,nx-842: Can not open TX window: %ld\n",
699 PTR_ERR(txwin));
700
701 return txwin;
702 }
703
704 /*
705 * Identify chip ID for each CPU, open send wndow for the corresponding NX
706 * engine and save txwin in percpu cpu_txwin.
707 * cpu_txwin is used in copy/paste operation for each compression /
708 * decompression request.
709 */
nx_open_percpu_txwins(void)710 static int nx_open_percpu_txwins(void)
711 {
712 struct nx_coproc *coproc, *n;
713 unsigned int i, chip_id;
714
715 for_each_possible_cpu(i) {
716 struct vas_window *txwin = NULL;
717
718 chip_id = cpu_to_chip_id(i);
719
720 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
721 /*
722 * Kernel requests use only high priority FIFOs. So
723 * open send windows for these FIFOs.
724 * GZIP is not supported in kernel right now.
725 */
726
727 if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
728 continue;
729
730 if (coproc->chip_id == chip_id) {
731 txwin = nx_alloc_txwin(coproc);
732 if (IS_ERR(txwin))
733 return PTR_ERR(txwin);
734
735 per_cpu(cpu_txwin, i) = txwin;
736 break;
737 }
738 }
739
740 if (!per_cpu(cpu_txwin, i)) {
741 /* shouldn't happen, Each chip will have NX engine */
742 pr_err("NX engine is not available for CPU %d\n", i);
743 return -EINVAL;
744 }
745 }
746
747 return 0;
748 }
749
nx_set_ct(struct nx_coproc * coproc,const char * priority,int high,int normal)750 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
751 int high, int normal)
752 {
753 if (!strcmp(priority, "High"))
754 coproc->ct = high;
755 else if (!strcmp(priority, "Normal"))
756 coproc->ct = normal;
757 else {
758 pr_err("Invalid RxFIFO priority value\n");
759 return -EINVAL;
760 }
761
762 return 0;
763 }
764
vas_cfg_coproc_info(struct device_node * dn,int chip_id,int vasid,int type,int * ct)765 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
766 int vasid, int type, int *ct)
767 {
768 struct vas_window *rxwin = NULL;
769 struct vas_rx_win_attr rxattr;
770 u32 lpid, pid, tid, fifo_size;
771 struct nx_coproc *coproc;
772 u64 rx_fifo;
773 const char *priority;
774 int ret;
775
776 ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
777 if (ret) {
778 pr_err("Missing rx-fifo-address property\n");
779 return ret;
780 }
781
782 ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
783 if (ret) {
784 pr_err("Missing rx-fifo-size property\n");
785 return ret;
786 }
787
788 ret = of_property_read_u32(dn, "lpid", &lpid);
789 if (ret) {
790 pr_err("Missing lpid property\n");
791 return ret;
792 }
793
794 ret = of_property_read_u32(dn, "pid", &pid);
795 if (ret) {
796 pr_err("Missing pid property\n");
797 return ret;
798 }
799
800 ret = of_property_read_u32(dn, "tid", &tid);
801 if (ret) {
802 pr_err("Missing tid property\n");
803 return ret;
804 }
805
806 ret = of_property_read_string(dn, "priority", &priority);
807 if (ret) {
808 pr_err("Missing priority property\n");
809 return ret;
810 }
811
812 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
813 if (!coproc)
814 return -ENOMEM;
815
816 if (type == NX_CT_842)
817 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
818 VAS_COP_TYPE_842);
819 else if (type == NX_CT_GZIP)
820 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
821 VAS_COP_TYPE_GZIP);
822
823 if (ret)
824 goto err_out;
825
826 vas_init_rx_win_attr(&rxattr, coproc->ct);
827 rxattr.rx_fifo = rx_fifo;
828 rxattr.rx_fifo_size = fifo_size;
829 rxattr.lnotify_lpid = lpid;
830 rxattr.lnotify_pid = pid;
831 rxattr.lnotify_tid = tid;
832 /*
833 * Maximum RX window credits can not be more than #CRBs in
834 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
835 */
836 rxattr.wcreds_max = fifo_size / CRB_SIZE;
837
838 /*
839 * Open a VAS receice window which is used to configure RxFIFO
840 * for NX.
841 */
842 rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
843 if (IS_ERR(rxwin)) {
844 ret = PTR_ERR(rxwin);
845 pr_err("setting RxFIFO with VAS failed: %d\n",
846 ret);
847 goto err_out;
848 }
849
850 coproc->vas.rxwin = rxwin;
851 coproc->vas.id = vasid;
852 nx_add_coprocs_list(coproc, chip_id);
853
854 /*
855 * (lpid, pid, tid) combination has to be unique for each
856 * coprocessor instance in the system. So to make it
857 * unique, skiboot uses coprocessor type such as 842 or
858 * GZIP for pid and provides this value to kernel in pid
859 * device-tree property.
860 */
861 *ct = pid;
862
863 return 0;
864
865 err_out:
866 kfree(coproc);
867 return ret;
868 }
869
nx_coproc_init(int chip_id,int ct_842,int ct_gzip)870 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
871 {
872 int ret = 0;
873
874 if (opal_check_token(OPAL_NX_COPROC_INIT)) {
875 ret = opal_nx_coproc_init(chip_id, ct_842);
876
877 if (!ret)
878 ret = opal_nx_coproc_init(chip_id, ct_gzip);
879
880 if (ret) {
881 ret = opal_error_code(ret);
882 pr_err("Failed to initialize NX for chip(%d): %d\n",
883 chip_id, ret);
884 }
885 } else
886 pr_warn("Firmware doesn't support NX initialization\n");
887
888 return ret;
889 }
890
find_nx_device_tree(struct device_node * dn,int chip_id,int vasid,int type,char * devname,int * ct)891 static int __init find_nx_device_tree(struct device_node *dn, int chip_id,
892 int vasid, int type, char *devname,
893 int *ct)
894 {
895 int ret = 0;
896
897 if (of_device_is_compatible(dn, devname)) {
898 ret = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct);
899 if (ret)
900 of_node_put(dn);
901 }
902
903 return ret;
904 }
905
nx_powernv_probe_vas(struct device_node * pn)906 static int __init nx_powernv_probe_vas(struct device_node *pn)
907 {
908 int chip_id, vasid, ret = 0;
909 int ct_842 = 0, ct_gzip = 0;
910 struct device_node *dn;
911
912 chip_id = of_get_ibm_chip_id(pn);
913 if (chip_id < 0) {
914 pr_err("ibm,chip-id missing\n");
915 return -EINVAL;
916 }
917
918 vasid = chip_to_vas_id(chip_id);
919 if (vasid < 0) {
920 pr_err("Unable to map chip_id %d to vasid\n", chip_id);
921 return -EINVAL;
922 }
923
924 for_each_child_of_node(pn, dn) {
925 ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
926 "ibm,p9-nx-842", &ct_842);
927
928 if (!ret)
929 ret = find_nx_device_tree(dn, chip_id, vasid,
930 NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
931
932 if (ret) {
933 of_node_put(dn);
934 return ret;
935 }
936 }
937
938 if (!ct_842 || !ct_gzip) {
939 pr_err("NX FIFO nodes are missing\n");
940 return -EINVAL;
941 }
942
943 /*
944 * Initialize NX instance for both high and normal priority FIFOs.
945 */
946 ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
947
948 return ret;
949 }
950
nx842_powernv_probe(struct device_node * dn)951 static int __init nx842_powernv_probe(struct device_node *dn)
952 {
953 struct nx_coproc *coproc;
954 unsigned int ct, ci;
955 int chip_id;
956
957 chip_id = of_get_ibm_chip_id(dn);
958 if (chip_id < 0) {
959 pr_err("ibm,chip-id missing\n");
960 return -EINVAL;
961 }
962
963 if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
964 pr_err("ibm,842-coprocessor-type missing\n");
965 return -EINVAL;
966 }
967
968 if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
969 pr_err("ibm,842-coprocessor-instance missing\n");
970 return -EINVAL;
971 }
972
973 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
974 if (!coproc)
975 return -ENOMEM;
976
977 coproc->ct = ct;
978 coproc->ci = ci;
979 nx_add_coprocs_list(coproc, chip_id);
980
981 pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
982
983 if (!nx842_ct)
984 nx842_ct = ct;
985 else if (nx842_ct != ct)
986 pr_err("NX842 chip %d, CT %d != first found CT %d\n",
987 chip_id, ct, nx842_ct);
988
989 return 0;
990 }
991
nx_delete_coprocs(void)992 static void nx_delete_coprocs(void)
993 {
994 struct nx_coproc *coproc, *n;
995 struct vas_window *txwin;
996 int i;
997
998 /*
999 * close percpu txwins that are opened for the corresponding coproc.
1000 */
1001 for_each_possible_cpu(i) {
1002 txwin = per_cpu(cpu_txwin, i);
1003 if (txwin)
1004 vas_win_close(txwin);
1005
1006 per_cpu(cpu_txwin, i) = NULL;
1007 }
1008
1009 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
1010 if (coproc->vas.rxwin)
1011 vas_win_close(coproc->vas.rxwin);
1012
1013 list_del(&coproc->list);
1014 kfree(coproc);
1015 }
1016 }
1017
1018 static struct nx842_constraints nx842_powernv_constraints = {
1019 .alignment = DDE_BUFFER_ALIGN,
1020 .multiple = DDE_BUFFER_LAST_MULT,
1021 .minimum = DDE_BUFFER_LAST_MULT,
1022 .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE,
1023 };
1024
1025 static struct nx842_driver nx842_powernv_driver = {
1026 .name = KBUILD_MODNAME,
1027 .owner = THIS_MODULE,
1028 .workmem_size = sizeof(struct nx842_workmem),
1029 .constraints = &nx842_powernv_constraints,
1030 .compress = nx842_powernv_compress,
1031 .decompress = nx842_powernv_decompress,
1032 };
1033
nx842_powernv_crypto_init(struct crypto_tfm * tfm)1034 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
1035 {
1036 return nx842_crypto_init(tfm, &nx842_powernv_driver);
1037 }
1038
1039 static struct crypto_alg nx842_powernv_alg = {
1040 .cra_name = "842",
1041 .cra_driver_name = "842-nx",
1042 .cra_priority = 300,
1043 .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
1044 .cra_ctxsize = sizeof(struct nx842_crypto_ctx),
1045 .cra_module = THIS_MODULE,
1046 .cra_init = nx842_powernv_crypto_init,
1047 .cra_exit = nx842_crypto_exit,
1048 .cra_u = { .compress = {
1049 .coa_compress = nx842_crypto_compress,
1050 .coa_decompress = nx842_crypto_decompress } }
1051 };
1052
nx_compress_powernv_init(void)1053 static __init int nx_compress_powernv_init(void)
1054 {
1055 struct device_node *dn;
1056 int ret;
1057
1058 /* verify workmem size/align restrictions */
1059 BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1060 BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1061 BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1062 /* verify buffer size/align restrictions */
1063 BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1064 BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1065 BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1066
1067 for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1068 ret = nx_powernv_probe_vas(dn);
1069 if (ret) {
1070 nx_delete_coprocs();
1071 of_node_put(dn);
1072 return ret;
1073 }
1074 }
1075
1076 if (list_empty(&nx_coprocs)) {
1077 for_each_compatible_node(dn, NULL, "ibm,power-nx")
1078 nx842_powernv_probe(dn);
1079
1080 if (!nx842_ct)
1081 return -ENODEV;
1082
1083 nx842_powernv_exec = nx842_exec_icswx;
1084 } else {
1085 /*
1086 * Register VAS user space API for NX GZIP so
1087 * that user space can use GZIP engine.
1088 * Using high FIFO priority for kernel requests and
1089 * normal FIFO priority is assigned for userspace.
1090 * 842 compression is supported only in kernel.
1091 */
1092 ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP,
1093 "nx-gzip");
1094
1095 /*
1096 * GZIP is not supported in kernel right now.
1097 * So open tx windows only for 842.
1098 */
1099 if (!ret)
1100 ret = nx_open_percpu_txwins();
1101
1102 if (ret) {
1103 nx_delete_coprocs();
1104 return ret;
1105 }
1106
1107 nx842_powernv_exec = nx842_exec_vas;
1108 }
1109
1110 ret = crypto_register_alg(&nx842_powernv_alg);
1111 if (ret) {
1112 nx_delete_coprocs();
1113 return ret;
1114 }
1115
1116 return 0;
1117 }
1118 module_init(nx_compress_powernv_init);
1119
nx_compress_powernv_exit(void)1120 static void __exit nx_compress_powernv_exit(void)
1121 {
1122 /*
1123 * GZIP engine is supported only in power9 or later and nx842_ct
1124 * is used on power8 (icswx).
1125 * VAS API for NX GZIP is registered during init for user space
1126 * use. So delete this API use for GZIP engine.
1127 */
1128 if (!nx842_ct)
1129 vas_unregister_api_powernv();
1130
1131 crypto_unregister_alg(&nx842_powernv_alg);
1132
1133 nx_delete_coprocs();
1134 }
1135 module_exit(nx_compress_powernv_exit);
1136