1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Cryptographic API for the NX-842 hardware compression. 4 * 5 * Copyright (C) IBM Corporation, 2011-2015 6 * 7 * Designer of the Power data compression engine: 8 * Bulent Abali <abali@us.ibm.com> 9 * 10 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> 11 * Seth Jennings <sjenning@linux.vnet.ibm.com> 12 * 13 * Rewrite: Dan Streetman <ddstreet@ieee.org> 14 * 15 * This is an interface to the NX-842 compression hardware in PowerPC 16 * processors. Most of the complexity of this drvier is due to the fact that 17 * the NX-842 compression hardware requires the input and output data buffers 18 * to be specifically aligned, to be a specific multiple in length, and within 19 * specific minimum and maximum lengths. Those restrictions, provided by the 20 * nx-842 driver via nx842_constraints, mean this driver must use bounce 21 * buffers and headers to correct misaligned in or out buffers, and to split 22 * input buffers that are too large. 23 * 24 * This driver will fall back to software decompression if the hardware 25 * decompression fails, so this driver's decompression should never fail as 26 * long as the provided compressed buffer is valid. Any compressed buffer 27 * created by this driver will have a header (except ones where the input 28 * perfectly matches the constraints); so users of this driver cannot simply 29 * pass a compressed buffer created by this driver over to the 842 software 30 * decompression library. Instead, users must use this driver to decompress; 31 * if the hardware fails or is unavailable, the compressed buffer will be 32 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842 33 * software decompression library. 34 * 35 * This does not fall back to software compression, however, since the caller 36 * of this function is specifically requesting hardware compression; if the 37 * hardware compression fails, the caller can fall back to software 38 * compression, and the raw 842 compressed buffer that the software compressor 39 * creates can be passed to this driver for hardware decompression; any 40 * buffer without our specific header magic is assumed to be a raw 842 buffer 41 * and passed directly to the hardware. Note that the software compression 42 * library will produce a compressed buffer that is incompatible with the 43 * hardware decompressor if the original input buffer length is not a multiple 44 * of 8; if such a compressed buffer is passed to this driver for 45 * decompression, the hardware will reject it and this driver will then pass 46 * it over to the software library for decompression. 47 */ 48 49 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 50 51 #include <linux/vmalloc.h> 52 #include <linux/sw842.h> 53 #include <linux/spinlock.h> 54 55 #include "nx-842.h" 56 57 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit 58 * template (see lib/842/842.h), so this magic number will never appear at 59 * the start of a raw 842 compressed buffer. That is important, as any buffer 60 * passed to us without this magic is assumed to be a raw 842 compressed 61 * buffer, and passed directly to the hardware to decompress. 62 */ 63 #define NX842_CRYPTO_MAGIC (0xf842) 64 #define NX842_CRYPTO_HEADER_SIZE(g) \ 65 (sizeof(struct nx842_crypto_header) + \ 66 sizeof(struct nx842_crypto_header_group) * (g)) 67 #define NX842_CRYPTO_HEADER_MAX_SIZE \ 68 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX) 69 70 /* bounce buffer size */ 71 #define BOUNCE_BUFFER_ORDER (2) 72 #define BOUNCE_BUFFER_SIZE \ 73 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER)) 74 75 /* try longer on comp because we can fallback to sw decomp if hw is busy */ 76 #define COMP_BUSY_TIMEOUT (250) /* ms */ 77 #define DECOMP_BUSY_TIMEOUT (50) /* ms */ 78 79 struct nx842_crypto_param { 80 u8 *in; 81 unsigned int iremain; 82 u8 *out; 83 unsigned int oremain; 84 unsigned int ototal; 85 }; 86 87 static int update_param(struct nx842_crypto_param *p, 88 unsigned int slen, unsigned int dlen) 89 { 90 if (p->iremain < slen) 91 return -EOVERFLOW; 92 if (p->oremain < dlen) 93 return -ENOSPC; 94 95 p->in += slen; 96 p->iremain -= slen; 97 p->out += dlen; 98 p->oremain -= dlen; 99 p->ototal += dlen; 100 101 return 0; 102 } 103 104 void *nx842_crypto_alloc_ctx(struct nx842_driver *driver) 105 { 106 struct nx842_crypto_ctx *ctx; 107 108 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 109 if (!ctx) 110 return ERR_PTR(-ENOMEM); 111 112 spin_lock_init(&ctx->lock); 113 ctx->driver = driver; 114 ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL); 115 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); 116 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); 117 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { 118 kfree(ctx->wmem); 119 free_page((unsigned long)ctx->sbounce); 120 free_page((unsigned long)ctx->dbounce); 121 kfree(ctx); 122 return ERR_PTR(-ENOMEM); 123 } 124 125 return ctx; 126 } 127 EXPORT_SYMBOL_GPL(nx842_crypto_alloc_ctx); 128 129 void nx842_crypto_free_ctx(void *p) 130 { 131 struct nx842_crypto_ctx *ctx = p; 132 133 kfree(ctx->wmem); 134 free_page((unsigned long)ctx->sbounce); 135 free_page((unsigned long)ctx->dbounce); 136 } 137 EXPORT_SYMBOL_GPL(nx842_crypto_free_ctx); 138 139 static void check_constraints(struct nx842_constraints *c) 140 { 141 /* limit maximum, to always have enough bounce buffer to decompress */ 142 if (c->maximum > BOUNCE_BUFFER_SIZE) 143 c->maximum = BOUNCE_BUFFER_SIZE; 144 } 145 146 static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf) 147 { 148 int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups); 149 150 /* compress should have added space for header */ 151 if (s > be16_to_cpu(hdr->group[0].padding)) { 152 pr_err("Internal error: no space for header\n"); 153 return -EINVAL; 154 } 155 156 memcpy(buf, hdr, s); 157 158 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0); 159 160 return 0; 161 } 162 163 static int compress(struct nx842_crypto_ctx *ctx, 164 struct nx842_crypto_param *p, 165 struct nx842_crypto_header_group *g, 166 struct nx842_constraints *c, 167 u16 *ignore, 168 unsigned int hdrsize) 169 { 170 unsigned int slen = p->iremain, dlen = p->oremain, tmplen; 171 unsigned int adj_slen = slen; 172 u8 *src = p->in, *dst = p->out; 173 int ret, dskip = 0; 174 ktime_t timeout; 175 176 if (p->iremain == 0) 177 return -EOVERFLOW; 178 179 if (p->oremain == 0 || hdrsize + c->minimum > dlen) 180 return -ENOSPC; 181 182 if (slen % c->multiple) 183 adj_slen = round_up(slen, c->multiple); 184 if (slen < c->minimum) 185 adj_slen = c->minimum; 186 if (slen > c->maximum) 187 adj_slen = slen = c->maximum; 188 if (adj_slen > slen || (u64)src % c->alignment) { 189 adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE); 190 slen = min(slen, BOUNCE_BUFFER_SIZE); 191 if (adj_slen > slen) 192 memset(ctx->sbounce + slen, 0, adj_slen - slen); 193 memcpy(ctx->sbounce, src, slen); 194 src = ctx->sbounce; 195 slen = adj_slen; 196 pr_debug("using comp sbounce buffer, len %x\n", slen); 197 } 198 199 dst += hdrsize; 200 dlen -= hdrsize; 201 202 if ((u64)dst % c->alignment) { 203 dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst); 204 dst += dskip; 205 dlen -= dskip; 206 } 207 if (dlen % c->multiple) 208 dlen = round_down(dlen, c->multiple); 209 if (dlen < c->minimum) { 210 nospc: 211 dst = ctx->dbounce; 212 dlen = min(p->oremain, BOUNCE_BUFFER_SIZE); 213 dlen = round_down(dlen, c->multiple); 214 dskip = 0; 215 pr_debug("using comp dbounce buffer, len %x\n", dlen); 216 } 217 if (dlen > c->maximum) 218 dlen = c->maximum; 219 220 tmplen = dlen; 221 timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT); 222 do { 223 dlen = tmplen; /* reset dlen, if we're retrying */ 224 ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem); 225 /* possibly we should reduce the slen here, instead of 226 * retrying with the dbounce buffer? 227 */ 228 if (ret == -ENOSPC && dst != ctx->dbounce) 229 goto nospc; 230 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); 231 if (ret) 232 return ret; 233 234 dskip += hdrsize; 235 236 if (dst == ctx->dbounce) 237 memcpy(p->out + dskip, dst, dlen); 238 239 g->padding = cpu_to_be16(dskip); 240 g->compressed_length = cpu_to_be32(dlen); 241 g->uncompressed_length = cpu_to_be32(slen); 242 243 if (p->iremain < slen) { 244 *ignore = slen - p->iremain; 245 slen = p->iremain; 246 } 247 248 pr_debug("compress slen %x ignore %x dlen %x padding %x\n", 249 slen, *ignore, dlen, dskip); 250 251 return update_param(p, slen, dskip + dlen); 252 } 253 254 int nx842_crypto_compress(struct crypto_scomp *tfm, 255 const u8 *src, unsigned int slen, 256 u8 *dst, unsigned int *dlen, void *pctx) 257 { 258 struct nx842_crypto_ctx *ctx = pctx; 259 struct nx842_crypto_header *hdr = 260 container_of(&ctx->header, 261 struct nx842_crypto_header, hdr); 262 struct nx842_crypto_param p; 263 struct nx842_constraints c = *ctx->driver->constraints; 264 unsigned int groups, hdrsize, h; 265 int ret, n; 266 bool add_header; 267 u16 ignore = 0; 268 269 check_constraints(&c); 270 271 p.in = (u8 *)src; 272 p.iremain = slen; 273 p.out = dst; 274 p.oremain = *dlen; 275 p.ototal = 0; 276 277 *dlen = 0; 278 279 groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX, 280 DIV_ROUND_UP(p.iremain, c.maximum)); 281 hdrsize = NX842_CRYPTO_HEADER_SIZE(groups); 282 283 spin_lock_bh(&ctx->lock); 284 285 /* skip adding header if the buffers meet all constraints */ 286 add_header = (p.iremain % c.multiple || 287 p.iremain < c.minimum || 288 p.iremain > c.maximum || 289 (u64)p.in % c.alignment || 290 p.oremain % c.multiple || 291 p.oremain < c.minimum || 292 p.oremain > c.maximum || 293 (u64)p.out % c.alignment); 294 295 hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC); 296 hdr->groups = 0; 297 hdr->ignore = 0; 298 299 while (p.iremain > 0) { 300 n = hdr->groups++; 301 ret = -ENOSPC; 302 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) 303 goto unlock; 304 305 /* header goes before first group */ 306 h = !n && add_header ? hdrsize : 0; 307 308 if (ignore) 309 pr_warn("internal error, ignore is set %x\n", ignore); 310 311 ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h); 312 if (ret) 313 goto unlock; 314 } 315 316 if (!add_header && hdr->groups > 1) { 317 pr_err("Internal error: No header but multiple groups\n"); 318 ret = -EINVAL; 319 goto unlock; 320 } 321 322 /* ignore indicates the input stream needed to be padded */ 323 hdr->ignore = cpu_to_be16(ignore); 324 if (ignore) 325 pr_debug("marked %d bytes as ignore\n", ignore); 326 327 if (add_header) 328 ret = nx842_crypto_add_header(hdr, dst); 329 if (ret) 330 goto unlock; 331 332 *dlen = p.ototal; 333 334 pr_debug("compress total slen %x dlen %x\n", slen, *dlen); 335 336 unlock: 337 spin_unlock_bh(&ctx->lock); 338 return ret; 339 } 340 EXPORT_SYMBOL_GPL(nx842_crypto_compress); 341 342 static int decompress(struct nx842_crypto_ctx *ctx, 343 struct nx842_crypto_param *p, 344 struct nx842_crypto_header_group *g, 345 struct nx842_constraints *c, 346 u16 ignore) 347 { 348 unsigned int slen = be32_to_cpu(g->compressed_length); 349 unsigned int required_len = be32_to_cpu(g->uncompressed_length); 350 unsigned int dlen = p->oremain, tmplen; 351 unsigned int adj_slen = slen; 352 u8 *src = p->in, *dst = p->out; 353 u16 padding = be16_to_cpu(g->padding); 354 int ret, spadding = 0; 355 ktime_t timeout; 356 357 if (!slen || !required_len) 358 return -EINVAL; 359 360 if (p->iremain <= 0 || padding + slen > p->iremain) 361 return -EOVERFLOW; 362 363 if (p->oremain <= 0 || required_len - ignore > p->oremain) 364 return -ENOSPC; 365 366 src += padding; 367 368 if (slen % c->multiple) 369 adj_slen = round_up(slen, c->multiple); 370 if (slen < c->minimum) 371 adj_slen = c->minimum; 372 if (slen > c->maximum) 373 goto usesw; 374 if (slen < adj_slen || (u64)src % c->alignment) { 375 /* we can append padding bytes because the 842 format defines 376 * an "end" template (see lib/842/842_decompress.c) and will 377 * ignore any bytes following it. 378 */ 379 if (slen < adj_slen) 380 memset(ctx->sbounce + slen, 0, adj_slen - slen); 381 memcpy(ctx->sbounce, src, slen); 382 src = ctx->sbounce; 383 spadding = adj_slen - slen; 384 slen = adj_slen; 385 pr_debug("using decomp sbounce buffer, len %x\n", slen); 386 } 387 388 if (dlen % c->multiple) 389 dlen = round_down(dlen, c->multiple); 390 if (dlen < required_len || (u64)dst % c->alignment) { 391 dst = ctx->dbounce; 392 dlen = min(required_len, BOUNCE_BUFFER_SIZE); 393 pr_debug("using decomp dbounce buffer, len %x\n", dlen); 394 } 395 if (dlen < c->minimum) 396 goto usesw; 397 if (dlen > c->maximum) 398 dlen = c->maximum; 399 400 tmplen = dlen; 401 timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT); 402 do { 403 dlen = tmplen; /* reset dlen, if we're retrying */ 404 ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem); 405 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); 406 if (ret) { 407 usesw: 408 /* reset everything, sw doesn't have constraints */ 409 src = p->in + padding; 410 slen = be32_to_cpu(g->compressed_length); 411 spadding = 0; 412 dst = p->out; 413 dlen = p->oremain; 414 if (dlen < required_len) { /* have ignore bytes */ 415 dst = ctx->dbounce; 416 dlen = BOUNCE_BUFFER_SIZE; 417 } 418 pr_info_ratelimited("using software 842 decompression\n"); 419 ret = sw842_decompress(src, slen, dst, &dlen); 420 } 421 if (ret) 422 return ret; 423 424 slen -= spadding; 425 426 dlen -= ignore; 427 if (ignore) 428 pr_debug("ignoring last %x bytes\n", ignore); 429 430 if (dst == ctx->dbounce) 431 memcpy(p->out, dst, dlen); 432 433 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n", 434 slen, padding, dlen, ignore); 435 436 return update_param(p, slen + padding, dlen); 437 } 438 439 int nx842_crypto_decompress(struct crypto_scomp *tfm, 440 const u8 *src, unsigned int slen, 441 u8 *dst, unsigned int *dlen, void *pctx) 442 { 443 struct nx842_crypto_ctx *ctx = pctx; 444 struct nx842_crypto_header *hdr; 445 struct nx842_crypto_param p; 446 struct nx842_constraints c = *ctx->driver->constraints; 447 int n, ret, hdr_len; 448 u16 ignore = 0; 449 450 check_constraints(&c); 451 452 p.in = (u8 *)src; 453 p.iremain = slen; 454 p.out = dst; 455 p.oremain = *dlen; 456 p.ototal = 0; 457 458 *dlen = 0; 459 460 hdr = (struct nx842_crypto_header *)src; 461 462 spin_lock_bh(&ctx->lock); 463 464 /* If it doesn't start with our header magic number, assume it's a raw 465 * 842 compressed buffer and pass it directly to the hardware driver 466 */ 467 if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) { 468 struct nx842_crypto_header_group g = { 469 .padding = 0, 470 .compressed_length = cpu_to_be32(p.iremain), 471 .uncompressed_length = cpu_to_be32(p.oremain), 472 }; 473 474 ret = decompress(ctx, &p, &g, &c, 0); 475 if (ret) 476 goto unlock; 477 478 goto success; 479 } 480 481 if (!hdr->groups) { 482 pr_err("header has no groups\n"); 483 ret = -EINVAL; 484 goto unlock; 485 } 486 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) { 487 pr_err("header has too many groups %x, max %x\n", 488 hdr->groups, NX842_CRYPTO_GROUP_MAX); 489 ret = -EINVAL; 490 goto unlock; 491 } 492 493 hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups); 494 if (hdr_len > slen) { 495 ret = -EOVERFLOW; 496 goto unlock; 497 } 498 499 memcpy(&ctx->header, src, hdr_len); 500 hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr); 501 502 for (n = 0; n < hdr->groups; n++) { 503 /* ignore applies to last group */ 504 if (n + 1 == hdr->groups) 505 ignore = be16_to_cpu(hdr->ignore); 506 507 ret = decompress(ctx, &p, &hdr->group[n], &c, ignore); 508 if (ret) 509 goto unlock; 510 } 511 512 success: 513 *dlen = p.ototal; 514 515 pr_debug("decompress total slen %x dlen %x\n", slen, *dlen); 516 517 ret = 0; 518 519 unlock: 520 spin_unlock_bh(&ctx->lock); 521 522 return ret; 523 } 524 EXPORT_SYMBOL_GPL(nx842_crypto_decompress); 525 526 MODULE_LICENSE("GPL"); 527 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver"); 528 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 529