1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ 3 4 #include <linux/init.h> 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/pci.h> 8 #include <linux/device.h> 9 #include <linux/iommu.h> 10 #include <uapi/linux/idxd.h> 11 #include <linux/highmem.h> 12 #include <linux/sched/smt.h> 13 #include <crypto/internal/acompress.h> 14 15 #include "idxd.h" 16 #include "iaa_crypto.h" 17 #include "iaa_crypto_stats.h" 18 19 #ifdef pr_fmt 20 #undef pr_fmt 21 #endif 22 23 #define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt 24 25 #define IAA_ALG_PRIORITY 300 26 27 /* number of iaa instances probed */ 28 static unsigned int nr_iaa; 29 static unsigned int nr_cpus; 30 static unsigned int nr_nodes; 31 static unsigned int nr_cpus_per_node; 32 33 /* Number of physical cpus sharing each iaa instance */ 34 static unsigned int cpus_per_iaa; 35 36 static struct crypto_comp *deflate_generic_tfm; 37 38 /* Per-cpu lookup table for balanced wqs */ 39 static struct wq_table_entry __percpu *wq_table; 40 41 static struct idxd_wq *wq_table_next_wq(int cpu) 42 { 43 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 44 45 if (++entry->cur_wq >= entry->n_wqs) 46 entry->cur_wq = 0; 47 48 if (!entry->wqs[entry->cur_wq]) 49 return NULL; 50 51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__, 52 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id, 53 entry->wqs[entry->cur_wq]->id, cpu); 54 55 return entry->wqs[entry->cur_wq]; 56 } 57 58 static void wq_table_add(int cpu, struct idxd_wq *wq) 59 { 60 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 61 62 if (WARN_ON(entry->n_wqs == entry->max_wqs)) 63 return; 64 65 entry->wqs[entry->n_wqs++] = wq; 66 67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__, 68 entry->wqs[entry->n_wqs - 1]->idxd->id, 69 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu); 70 } 71 72 static void wq_table_free_entry(int cpu) 73 { 74 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 75 76 kfree(entry->wqs); 77 memset(entry, 0, sizeof(*entry)); 78 } 79 80 static void wq_table_clear_entry(int cpu) 81 { 82 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 83 84 entry->n_wqs = 0; 85 entry->cur_wq = 0; 86 memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *)); 87 } 88 89 LIST_HEAD(iaa_devices); 90 DEFINE_MUTEX(iaa_devices_lock); 91 92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */ 93 static bool iaa_crypto_enabled; 94 static bool iaa_crypto_registered; 95 96 /* Verify results of IAA compress or not */ 97 static bool iaa_verify_compress = true; 98 99 static ssize_t verify_compress_show(struct device_driver *driver, char *buf) 100 { 101 return sprintf(buf, "%d\n", iaa_verify_compress); 102 } 103 104 static ssize_t verify_compress_store(struct device_driver *driver, 105 const char *buf, size_t count) 106 { 107 int ret = -EBUSY; 108 109 mutex_lock(&iaa_devices_lock); 110 111 if (iaa_crypto_enabled) 112 goto out; 113 114 ret = kstrtobool(buf, &iaa_verify_compress); 115 if (ret) 116 goto out; 117 118 ret = count; 119 out: 120 mutex_unlock(&iaa_devices_lock); 121 122 return ret; 123 } 124 static DRIVER_ATTR_RW(verify_compress); 125 126 /* 127 * The iaa crypto driver supports three 'sync' methods determining how 128 * compressions and decompressions are performed: 129 * 130 * - sync: the compression or decompression completes before 131 * returning. This is the mode used by the async crypto 132 * interface when the sync mode is set to 'sync' and by 133 * the sync crypto interface regardless of setting. 134 * 135 * - async: the compression or decompression is submitted and returns 136 * immediately. Completion interrupts are not used so 137 * the caller is responsible for polling the descriptor 138 * for completion. This mode is applicable to only the 139 * async crypto interface and is ignored for anything 140 * else. 141 * 142 * - async_irq: the compression or decompression is submitted and 143 * returns immediately. Completion interrupts are 144 * enabled so the caller can wait for the completion and 145 * yield to other threads. When the compression or 146 * decompression completes, the completion is signaled 147 * and the caller awakened. This mode is applicable to 148 * only the async crypto interface and is ignored for 149 * anything else. 150 * 151 * These modes can be set using the iaa_crypto sync_mode driver 152 * attribute. 153 */ 154 155 /* Use async mode */ 156 static bool async_mode; 157 /* Use interrupts */ 158 static bool use_irq; 159 160 /** 161 * set_iaa_sync_mode - Set IAA sync mode 162 * @name: The name of the sync mode 163 * 164 * Make the IAA sync mode named @name the current sync mode used by 165 * compression/decompression. 166 */ 167 168 static int set_iaa_sync_mode(const char *name) 169 { 170 int ret = 0; 171 172 if (sysfs_streq(name, "sync")) { 173 async_mode = false; 174 use_irq = false; 175 } else if (sysfs_streq(name, "async")) { 176 async_mode = false; 177 use_irq = false; 178 } else if (sysfs_streq(name, "async_irq")) { 179 async_mode = true; 180 use_irq = true; 181 } else { 182 ret = -EINVAL; 183 } 184 185 return ret; 186 } 187 188 static ssize_t sync_mode_show(struct device_driver *driver, char *buf) 189 { 190 int ret = 0; 191 192 if (!async_mode && !use_irq) 193 ret = sprintf(buf, "%s\n", "sync"); 194 else if (async_mode && !use_irq) 195 ret = sprintf(buf, "%s\n", "async"); 196 else if (async_mode && use_irq) 197 ret = sprintf(buf, "%s\n", "async_irq"); 198 199 return ret; 200 } 201 202 static ssize_t sync_mode_store(struct device_driver *driver, 203 const char *buf, size_t count) 204 { 205 int ret = -EBUSY; 206 207 mutex_lock(&iaa_devices_lock); 208 209 if (iaa_crypto_enabled) 210 goto out; 211 212 ret = set_iaa_sync_mode(buf); 213 if (ret == 0) 214 ret = count; 215 out: 216 mutex_unlock(&iaa_devices_lock); 217 218 return ret; 219 } 220 static DRIVER_ATTR_RW(sync_mode); 221 222 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX]; 223 224 static int find_empty_iaa_compression_mode(void) 225 { 226 int i = -EINVAL; 227 228 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 229 if (iaa_compression_modes[i]) 230 continue; 231 break; 232 } 233 234 return i; 235 } 236 237 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx) 238 { 239 struct iaa_compression_mode *mode; 240 int i; 241 242 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 243 mode = iaa_compression_modes[i]; 244 if (!mode) 245 continue; 246 247 if (!strcmp(mode->name, name)) { 248 *idx = i; 249 return iaa_compression_modes[i]; 250 } 251 } 252 253 return NULL; 254 } 255 256 static void free_iaa_compression_mode(struct iaa_compression_mode *mode) 257 { 258 kfree(mode->name); 259 kfree(mode->ll_table); 260 kfree(mode->d_table); 261 262 kfree(mode); 263 } 264 265 /* 266 * IAA Compression modes are defined by an ll_table and a d_table. 267 * These tables are typically generated and captured using statistics 268 * collected from running actual compress/decompress workloads. 269 * 270 * A module or other kernel code can add and remove compression modes 271 * with a given name using the exported @add_iaa_compression_mode() 272 * and @remove_iaa_compression_mode functions. 273 * 274 * When a new compression mode is added, the tables are saved in a 275 * global compression mode list. When IAA devices are added, a 276 * per-IAA device dma mapping is created for each IAA device, for each 277 * compression mode. These are the tables used to do the actual 278 * compression/deccompression and are unmapped if/when the devices are 279 * removed. Currently, compression modes must be added before any 280 * device is added, and removed after all devices have been removed. 281 */ 282 283 /** 284 * remove_iaa_compression_mode - Remove an IAA compression mode 285 * @name: The name the compression mode will be known as 286 * 287 * Remove the IAA compression mode named @name. 288 */ 289 void remove_iaa_compression_mode(const char *name) 290 { 291 struct iaa_compression_mode *mode; 292 int idx; 293 294 mutex_lock(&iaa_devices_lock); 295 296 if (!list_empty(&iaa_devices)) 297 goto out; 298 299 mode = find_iaa_compression_mode(name, &idx); 300 if (mode) { 301 free_iaa_compression_mode(mode); 302 iaa_compression_modes[idx] = NULL; 303 } 304 out: 305 mutex_unlock(&iaa_devices_lock); 306 } 307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); 308 309 /** 310 * add_iaa_compression_mode - Add an IAA compression mode 311 * @name: The name the compression mode will be known as 312 * @ll_table: The ll table 313 * @ll_table_size: The ll table size in bytes 314 * @d_table: The d table 315 * @d_table_size: The d table size in bytes 316 * @init: Optional callback function to init the compression mode data 317 * @free: Optional callback function to free the compression mode data 318 * 319 * Add a new IAA compression mode named @name. 320 * 321 * Returns 0 if successful, errcode otherwise. 322 */ 323 int add_iaa_compression_mode(const char *name, 324 const u32 *ll_table, 325 int ll_table_size, 326 const u32 *d_table, 327 int d_table_size, 328 iaa_dev_comp_init_fn_t init, 329 iaa_dev_comp_free_fn_t free) 330 { 331 struct iaa_compression_mode *mode; 332 int idx, ret = -ENOMEM; 333 334 mutex_lock(&iaa_devices_lock); 335 336 if (!list_empty(&iaa_devices)) { 337 ret = -EBUSY; 338 goto out; 339 } 340 341 mode = kzalloc(sizeof(*mode), GFP_KERNEL); 342 if (!mode) 343 goto out; 344 345 mode->name = kstrdup(name, GFP_KERNEL); 346 if (!mode->name) 347 goto free; 348 349 if (ll_table) { 350 mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL); 351 if (!mode->ll_table) 352 goto free; 353 mode->ll_table_size = ll_table_size; 354 } 355 356 if (d_table) { 357 mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL); 358 if (!mode->d_table) 359 goto free; 360 mode->d_table_size = d_table_size; 361 } 362 363 mode->init = init; 364 mode->free = free; 365 366 idx = find_empty_iaa_compression_mode(); 367 if (idx < 0) 368 goto free; 369 370 pr_debug("IAA compression mode %s added at idx %d\n", 371 mode->name, idx); 372 373 iaa_compression_modes[idx] = mode; 374 375 ret = 0; 376 out: 377 mutex_unlock(&iaa_devices_lock); 378 379 return ret; 380 free: 381 free_iaa_compression_mode(mode); 382 goto out; 383 } 384 EXPORT_SYMBOL_GPL(add_iaa_compression_mode); 385 386 static struct iaa_device_compression_mode * 387 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx) 388 { 389 return iaa_device->compression_modes[idx]; 390 } 391 392 static void free_device_compression_mode(struct iaa_device *iaa_device, 393 struct iaa_device_compression_mode *device_mode) 394 { 395 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN; 396 struct device *dev = &iaa_device->idxd->pdev->dev; 397 398 kfree(device_mode->name); 399 400 if (device_mode->aecs_comp_table) 401 dma_free_coherent(dev, size, device_mode->aecs_comp_table, 402 device_mode->aecs_comp_table_dma_addr); 403 kfree(device_mode); 404 } 405 406 #define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000 407 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC) 408 #define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS) 409 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS) 410 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \ 411 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \ 412 IDXD_OP_FLAG_AECS_RW_TGLS) 413 414 static int check_completion(struct device *dev, 415 struct iax_completion_record *comp, 416 bool compress, 417 bool only_once); 418 419 static int init_device_compression_mode(struct iaa_device *iaa_device, 420 struct iaa_compression_mode *mode, 421 int idx, struct idxd_wq *wq) 422 { 423 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN; 424 struct device *dev = &iaa_device->idxd->pdev->dev; 425 struct iaa_device_compression_mode *device_mode; 426 int ret = -ENOMEM; 427 428 device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL); 429 if (!device_mode) 430 return -ENOMEM; 431 432 device_mode->name = kstrdup(mode->name, GFP_KERNEL); 433 if (!device_mode->name) 434 goto free; 435 436 device_mode->aecs_comp_table = dma_alloc_coherent(dev, size, 437 &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL); 438 if (!device_mode->aecs_comp_table) 439 goto free; 440 441 /* Add Huffman table to aecs */ 442 memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); 443 memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); 444 memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); 445 446 if (mode->init) { 447 ret = mode->init(device_mode); 448 if (ret) 449 goto free; 450 } 451 452 /* mode index should match iaa_compression_modes idx */ 453 iaa_device->compression_modes[idx] = device_mode; 454 455 pr_debug("IAA %s compression mode initialized for iaa device %d\n", 456 mode->name, iaa_device->idxd->id); 457 458 ret = 0; 459 out: 460 return ret; 461 free: 462 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n", 463 mode->name, iaa_device->idxd->id); 464 465 free_device_compression_mode(iaa_device, device_mode); 466 goto out; 467 } 468 469 static int init_device_compression_modes(struct iaa_device *iaa_device, 470 struct idxd_wq *wq) 471 { 472 struct iaa_compression_mode *mode; 473 int i, ret = 0; 474 475 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 476 mode = iaa_compression_modes[i]; 477 if (!mode) 478 continue; 479 480 ret = init_device_compression_mode(iaa_device, mode, i, wq); 481 if (ret) 482 break; 483 } 484 485 return ret; 486 } 487 488 static void remove_device_compression_modes(struct iaa_device *iaa_device) 489 { 490 struct iaa_device_compression_mode *device_mode; 491 int i; 492 493 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 494 device_mode = iaa_device->compression_modes[i]; 495 if (!device_mode) 496 continue; 497 498 if (iaa_compression_modes[i]->free) 499 iaa_compression_modes[i]->free(device_mode); 500 free_device_compression_mode(iaa_device, device_mode); 501 iaa_device->compression_modes[i] = NULL; 502 } 503 } 504 505 static struct iaa_device *iaa_device_alloc(void) 506 { 507 struct iaa_device *iaa_device; 508 509 iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL); 510 if (!iaa_device) 511 return NULL; 512 513 INIT_LIST_HEAD(&iaa_device->wqs); 514 515 return iaa_device; 516 } 517 518 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq) 519 { 520 struct iaa_wq *iaa_wq; 521 522 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) { 523 if (iaa_wq->wq == wq) 524 return true; 525 } 526 527 return false; 528 } 529 530 static struct iaa_device *add_iaa_device(struct idxd_device *idxd) 531 { 532 struct iaa_device *iaa_device; 533 534 iaa_device = iaa_device_alloc(); 535 if (!iaa_device) 536 return NULL; 537 538 iaa_device->idxd = idxd; 539 540 list_add_tail(&iaa_device->list, &iaa_devices); 541 542 nr_iaa++; 543 544 return iaa_device; 545 } 546 547 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq) 548 { 549 int ret = 0; 550 551 ret = init_device_compression_modes(iaa_device, iaa_wq->wq); 552 if (ret) 553 return ret; 554 555 return ret; 556 } 557 558 static void del_iaa_device(struct iaa_device *iaa_device) 559 { 560 list_del(&iaa_device->list); 561 562 nr_iaa--; 563 } 564 565 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq, 566 struct iaa_wq **new_wq) 567 { 568 struct idxd_device *idxd = iaa_device->idxd; 569 struct pci_dev *pdev = idxd->pdev; 570 struct device *dev = &pdev->dev; 571 struct iaa_wq *iaa_wq; 572 573 iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL); 574 if (!iaa_wq) 575 return -ENOMEM; 576 577 iaa_wq->wq = wq; 578 iaa_wq->iaa_device = iaa_device; 579 idxd_wq_set_private(wq, iaa_wq); 580 581 list_add_tail(&iaa_wq->list, &iaa_device->wqs); 582 583 iaa_device->n_wq++; 584 585 if (new_wq) 586 *new_wq = iaa_wq; 587 588 dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n", 589 wq->id, iaa_device->idxd->id, iaa_device->n_wq); 590 591 return 0; 592 } 593 594 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq) 595 { 596 struct idxd_device *idxd = iaa_device->idxd; 597 struct pci_dev *pdev = idxd->pdev; 598 struct device *dev = &pdev->dev; 599 struct iaa_wq *iaa_wq; 600 601 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) { 602 if (iaa_wq->wq == wq) { 603 list_del(&iaa_wq->list); 604 iaa_device->n_wq--; 605 606 dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n", 607 wq->id, iaa_device->idxd->id, 608 iaa_device->n_wq, nr_iaa); 609 610 if (iaa_device->n_wq == 0) 611 del_iaa_device(iaa_device); 612 break; 613 } 614 } 615 } 616 617 static void clear_wq_table(void) 618 { 619 int cpu; 620 621 for (cpu = 0; cpu < nr_cpus; cpu++) 622 wq_table_clear_entry(cpu); 623 624 pr_debug("cleared wq table\n"); 625 } 626 627 static void free_iaa_device(struct iaa_device *iaa_device) 628 { 629 if (!iaa_device) 630 return; 631 632 remove_device_compression_modes(iaa_device); 633 kfree(iaa_device); 634 } 635 636 static void __free_iaa_wq(struct iaa_wq *iaa_wq) 637 { 638 struct iaa_device *iaa_device; 639 640 if (!iaa_wq) 641 return; 642 643 iaa_device = iaa_wq->iaa_device; 644 if (iaa_device->n_wq == 0) 645 free_iaa_device(iaa_wq->iaa_device); 646 } 647 648 static void free_iaa_wq(struct iaa_wq *iaa_wq) 649 { 650 struct idxd_wq *wq; 651 652 __free_iaa_wq(iaa_wq); 653 654 wq = iaa_wq->wq; 655 656 kfree(iaa_wq); 657 idxd_wq_set_private(wq, NULL); 658 } 659 660 static int iaa_wq_get(struct idxd_wq *wq) 661 { 662 struct idxd_device *idxd = wq->idxd; 663 struct iaa_wq *iaa_wq; 664 int ret = 0; 665 666 spin_lock(&idxd->dev_lock); 667 iaa_wq = idxd_wq_get_private(wq); 668 if (iaa_wq && !iaa_wq->remove) { 669 iaa_wq->ref++; 670 idxd_wq_get(wq); 671 } else { 672 ret = -ENODEV; 673 } 674 spin_unlock(&idxd->dev_lock); 675 676 return ret; 677 } 678 679 static int iaa_wq_put(struct idxd_wq *wq) 680 { 681 struct idxd_device *idxd = wq->idxd; 682 struct iaa_wq *iaa_wq; 683 bool free = false; 684 int ret = 0; 685 686 spin_lock(&idxd->dev_lock); 687 iaa_wq = idxd_wq_get_private(wq); 688 if (iaa_wq) { 689 iaa_wq->ref--; 690 if (iaa_wq->ref == 0 && iaa_wq->remove) { 691 idxd_wq_set_private(wq, NULL); 692 free = true; 693 } 694 idxd_wq_put(wq); 695 } else { 696 ret = -ENODEV; 697 } 698 spin_unlock(&idxd->dev_lock); 699 if (free) { 700 __free_iaa_wq(iaa_wq); 701 kfree(iaa_wq); 702 } 703 704 return ret; 705 } 706 707 static void free_wq_table(void) 708 { 709 int cpu; 710 711 for (cpu = 0; cpu < nr_cpus; cpu++) 712 wq_table_free_entry(cpu); 713 714 free_percpu(wq_table); 715 716 pr_debug("freed wq table\n"); 717 } 718 719 static int alloc_wq_table(int max_wqs) 720 { 721 struct wq_table_entry *entry; 722 int cpu; 723 724 wq_table = alloc_percpu(struct wq_table_entry); 725 if (!wq_table) 726 return -ENOMEM; 727 728 for (cpu = 0; cpu < nr_cpus; cpu++) { 729 entry = per_cpu_ptr(wq_table, cpu); 730 entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL); 731 if (!entry->wqs) { 732 free_wq_table(); 733 return -ENOMEM; 734 } 735 736 entry->max_wqs = max_wqs; 737 } 738 739 pr_debug("initialized wq table\n"); 740 741 return 0; 742 } 743 744 static int save_iaa_wq(struct idxd_wq *wq) 745 { 746 struct iaa_device *iaa_device, *found = NULL; 747 struct idxd_device *idxd; 748 struct pci_dev *pdev; 749 struct device *dev; 750 int ret = 0; 751 752 list_for_each_entry(iaa_device, &iaa_devices, list) { 753 if (iaa_device->idxd == wq->idxd) { 754 idxd = iaa_device->idxd; 755 pdev = idxd->pdev; 756 dev = &pdev->dev; 757 /* 758 * Check to see that we don't already have this wq. 759 * Shouldn't happen but we don't control probing. 760 */ 761 if (iaa_has_wq(iaa_device, wq)) { 762 dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n", 763 iaa_device); 764 goto out; 765 } 766 767 found = iaa_device; 768 769 ret = add_iaa_wq(iaa_device, wq, NULL); 770 if (ret) 771 goto out; 772 773 break; 774 } 775 } 776 777 if (!found) { 778 struct iaa_device *new_device; 779 struct iaa_wq *new_wq; 780 781 new_device = add_iaa_device(wq->idxd); 782 if (!new_device) { 783 ret = -ENOMEM; 784 goto out; 785 } 786 787 ret = add_iaa_wq(new_device, wq, &new_wq); 788 if (ret) { 789 del_iaa_device(new_device); 790 free_iaa_device(new_device); 791 goto out; 792 } 793 794 ret = init_iaa_device(new_device, new_wq); 795 if (ret) { 796 del_iaa_wq(new_device, new_wq->wq); 797 del_iaa_device(new_device); 798 free_iaa_wq(new_wq); 799 goto out; 800 } 801 } 802 803 if (WARN_ON(nr_iaa == 0)) 804 return -EINVAL; 805 806 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; 807 if (!cpus_per_iaa) 808 cpus_per_iaa = 1; 809 out: 810 return 0; 811 } 812 813 static void remove_iaa_wq(struct idxd_wq *wq) 814 { 815 struct iaa_device *iaa_device; 816 817 list_for_each_entry(iaa_device, &iaa_devices, list) { 818 if (iaa_has_wq(iaa_device, wq)) { 819 del_iaa_wq(iaa_device, wq); 820 break; 821 } 822 } 823 824 if (nr_iaa) { 825 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; 826 if (!cpus_per_iaa) 827 cpus_per_iaa = 1; 828 } else 829 cpus_per_iaa = 1; 830 } 831 832 static int wq_table_add_wqs(int iaa, int cpu) 833 { 834 struct iaa_device *iaa_device, *found_device = NULL; 835 int ret = 0, cur_iaa = 0, n_wqs_added = 0; 836 struct idxd_device *idxd; 837 struct iaa_wq *iaa_wq; 838 struct pci_dev *pdev; 839 struct device *dev; 840 841 list_for_each_entry(iaa_device, &iaa_devices, list) { 842 idxd = iaa_device->idxd; 843 pdev = idxd->pdev; 844 dev = &pdev->dev; 845 846 if (cur_iaa != iaa) { 847 cur_iaa++; 848 continue; 849 } 850 851 found_device = iaa_device; 852 dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n", 853 found_device->idxd->id, cur_iaa); 854 break; 855 } 856 857 if (!found_device) { 858 found_device = list_first_entry_or_null(&iaa_devices, 859 struct iaa_device, list); 860 if (!found_device) { 861 pr_debug("couldn't find any iaa devices with wqs!\n"); 862 ret = -EINVAL; 863 goto out; 864 } 865 cur_iaa = 0; 866 867 idxd = found_device->idxd; 868 pdev = idxd->pdev; 869 dev = &pdev->dev; 870 dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n", 871 found_device->idxd->id, cur_iaa); 872 } 873 874 list_for_each_entry(iaa_wq, &found_device->wqs, list) { 875 wq_table_add(cpu, iaa_wq->wq); 876 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n", 877 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id); 878 n_wqs_added++; 879 } 880 881 if (!n_wqs_added) { 882 pr_debug("couldn't find any iaa wqs!\n"); 883 ret = -EINVAL; 884 goto out; 885 } 886 out: 887 return ret; 888 } 889 890 /* 891 * Rebalance the wq table so that given a cpu, it's easy to find the 892 * closest IAA instance. The idea is to try to choose the most 893 * appropriate IAA instance for a caller and spread available 894 * workqueues around to clients. 895 */ 896 static void rebalance_wq_table(void) 897 { 898 const struct cpumask *node_cpus; 899 int node, cpu, iaa = -1; 900 901 if (nr_iaa == 0) 902 return; 903 904 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n", 905 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa); 906 907 clear_wq_table(); 908 909 if (nr_iaa == 1) { 910 for (cpu = 0; cpu < nr_cpus; cpu++) { 911 if (WARN_ON(wq_table_add_wqs(0, cpu))) { 912 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu); 913 return; 914 } 915 } 916 917 return; 918 } 919 920 for_each_node_with_cpus(node) { 921 node_cpus = cpumask_of_node(node); 922 923 for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) { 924 int node_cpu = cpumask_nth(cpu, node_cpus); 925 926 if (WARN_ON(node_cpu >= nr_cpu_ids)) { 927 pr_debug("node_cpu %d doesn't exist!\n", node_cpu); 928 return; 929 } 930 931 if ((cpu % cpus_per_iaa) == 0) 932 iaa++; 933 934 if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) { 935 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); 936 return; 937 } 938 } 939 } 940 } 941 942 static inline int check_completion(struct device *dev, 943 struct iax_completion_record *comp, 944 bool compress, 945 bool only_once) 946 { 947 char *op_str = compress ? "compress" : "decompress"; 948 int status_checks = 0; 949 int ret = 0; 950 951 while (!comp->status) { 952 if (only_once) 953 return -EAGAIN; 954 cpu_relax(); 955 if (status_checks++ >= IAA_COMPLETION_TIMEOUT) { 956 /* Something is wrong with the hw, disable it. */ 957 dev_err(dev, "%s completion timed out - " 958 "assuming broken hw, iaa_crypto now DISABLED\n", 959 op_str); 960 iaa_crypto_enabled = false; 961 ret = -ETIMEDOUT; 962 goto out; 963 } 964 } 965 966 if (comp->status != IAX_COMP_SUCCESS) { 967 if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) { 968 ret = -ETIMEDOUT; 969 dev_dbg(dev, "%s timed out, size=0x%x\n", 970 op_str, comp->output_size); 971 update_completion_timeout_errs(); 972 goto out; 973 } 974 975 if (comp->status == IAA_ANALYTICS_ERROR && 976 comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) { 977 ret = -E2BIG; 978 dev_dbg(dev, "compressed > uncompressed size," 979 " not compressing, size=0x%x\n", 980 comp->output_size); 981 update_completion_comp_buf_overflow_errs(); 982 goto out; 983 } 984 985 if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) { 986 ret = -EOVERFLOW; 987 goto out; 988 } 989 990 ret = -EINVAL; 991 dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n", 992 op_str, comp->status, comp->error_code, comp->output_size); 993 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0); 994 update_completion_einval_errs(); 995 996 goto out; 997 } 998 out: 999 return ret; 1000 } 1001 1002 static int deflate_generic_decompress(struct acomp_req *req) 1003 { 1004 void *src, *dst; 1005 int ret; 1006 1007 src = kmap_local_page(sg_page(req->src)) + req->src->offset; 1008 dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset; 1009 1010 ret = crypto_comp_decompress(deflate_generic_tfm, 1011 src, req->slen, dst, &req->dlen); 1012 1013 kunmap_local(src); 1014 kunmap_local(dst); 1015 1016 update_total_sw_decomp_calls(); 1017 1018 return ret; 1019 } 1020 1021 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, 1022 struct acomp_req *req, 1023 dma_addr_t *src_addr, dma_addr_t *dst_addr); 1024 1025 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, 1026 struct idxd_wq *wq, 1027 dma_addr_t src_addr, unsigned int slen, 1028 dma_addr_t dst_addr, unsigned int *dlen, 1029 u32 compression_crc); 1030 1031 static void iaa_desc_complete(struct idxd_desc *idxd_desc, 1032 enum idxd_complete_type comp_type, 1033 bool free_desc, void *__ctx, 1034 u32 *status) 1035 { 1036 struct iaa_device_compression_mode *active_compression_mode; 1037 struct iaa_compression_ctx *compression_ctx; 1038 struct crypto_ctx *ctx = __ctx; 1039 struct iaa_device *iaa_device; 1040 struct idxd_device *idxd; 1041 struct iaa_wq *iaa_wq; 1042 struct pci_dev *pdev; 1043 struct device *dev; 1044 int ret, err = 0; 1045 1046 compression_ctx = crypto_tfm_ctx(ctx->tfm); 1047 1048 iaa_wq = idxd_wq_get_private(idxd_desc->wq); 1049 iaa_device = iaa_wq->iaa_device; 1050 idxd = iaa_device->idxd; 1051 pdev = idxd->pdev; 1052 dev = &pdev->dev; 1053 1054 active_compression_mode = get_iaa_device_compression_mode(iaa_device, 1055 compression_ctx->mode); 1056 dev_dbg(dev, "%s: compression mode %s," 1057 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__, 1058 active_compression_mode->name, 1059 ctx->src_addr, ctx->dst_addr); 1060 1061 ret = check_completion(dev, idxd_desc->iax_completion, 1062 ctx->compress, false); 1063 if (ret) { 1064 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret); 1065 if (!ctx->compress && 1066 idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) { 1067 pr_warn("%s: falling back to deflate-generic decompress, " 1068 "analytics error code %x\n", __func__, 1069 idxd_desc->iax_completion->error_code); 1070 ret = deflate_generic_decompress(ctx->req); 1071 if (ret) { 1072 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n", 1073 __func__, ret); 1074 err = -EIO; 1075 goto err; 1076 } 1077 } else { 1078 err = -EIO; 1079 goto err; 1080 } 1081 } else { 1082 ctx->req->dlen = idxd_desc->iax_completion->output_size; 1083 } 1084 1085 /* Update stats */ 1086 if (ctx->compress) { 1087 update_total_comp_bytes_out(ctx->req->dlen); 1088 update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); 1089 } else { 1090 update_total_decomp_bytes_in(ctx->req->slen); 1091 update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen); 1092 } 1093 1094 if (ctx->compress && compression_ctx->verify_compress) { 1095 dma_addr_t src_addr, dst_addr; 1096 u32 compression_crc; 1097 1098 compression_crc = idxd_desc->iax_completion->crc; 1099 1100 ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr); 1101 if (ret) { 1102 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret); 1103 err = -EIO; 1104 goto out; 1105 } 1106 1107 ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr, 1108 ctx->req->slen, dst_addr, &ctx->req->dlen, 1109 compression_crc); 1110 if (ret) { 1111 dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret); 1112 err = -EIO; 1113 } 1114 1115 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE); 1116 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE); 1117 1118 goto out; 1119 } 1120 err: 1121 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE); 1122 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE); 1123 out: 1124 if (ret != 0) 1125 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); 1126 1127 if (ctx->req->base.complete) 1128 acomp_request_complete(ctx->req, err); 1129 1130 if (free_desc) 1131 idxd_free_desc(idxd_desc->wq, idxd_desc); 1132 iaa_wq_put(idxd_desc->wq); 1133 } 1134 1135 static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, 1136 struct idxd_wq *wq, 1137 dma_addr_t src_addr, unsigned int slen, 1138 dma_addr_t dst_addr, unsigned int *dlen, 1139 u32 *compression_crc) 1140 { 1141 struct iaa_device_compression_mode *active_compression_mode; 1142 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1143 struct iaa_device *iaa_device; 1144 struct idxd_desc *idxd_desc; 1145 struct iax_hw_desc *desc; 1146 struct idxd_device *idxd; 1147 struct iaa_wq *iaa_wq; 1148 struct pci_dev *pdev; 1149 struct device *dev; 1150 int ret = 0; 1151 1152 iaa_wq = idxd_wq_get_private(wq); 1153 iaa_device = iaa_wq->iaa_device; 1154 idxd = iaa_device->idxd; 1155 pdev = idxd->pdev; 1156 dev = &pdev->dev; 1157 1158 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); 1159 1160 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); 1161 if (IS_ERR(idxd_desc)) { 1162 dev_dbg(dev, "idxd descriptor allocation failed\n"); 1163 dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc)); 1164 return PTR_ERR(idxd_desc); 1165 } 1166 desc = idxd_desc->iax_hw; 1167 1168 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | 1169 IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC; 1170 desc->opcode = IAX_OPCODE_COMPRESS; 1171 desc->compr_flags = IAA_COMP_FLAGS; 1172 desc->priv = 0; 1173 1174 desc->src1_addr = (u64)src_addr; 1175 desc->src1_size = slen; 1176 desc->dst_addr = (u64)dst_addr; 1177 desc->max_dst_size = *dlen; 1178 desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr; 1179 desc->src2_size = sizeof(struct aecs_comp_table_record); 1180 desc->completion_addr = idxd_desc->compl_dma; 1181 1182 if (ctx->use_irq) { 1183 desc->flags |= IDXD_OP_FLAG_RCI; 1184 1185 idxd_desc->crypto.req = req; 1186 idxd_desc->crypto.tfm = tfm; 1187 idxd_desc->crypto.src_addr = src_addr; 1188 idxd_desc->crypto.dst_addr = dst_addr; 1189 idxd_desc->crypto.compress = true; 1190 1191 dev_dbg(dev, "%s use_async_irq: compression mode %s," 1192 " src_addr %llx, dst_addr %llx\n", __func__, 1193 active_compression_mode->name, 1194 src_addr, dst_addr); 1195 } else if (ctx->async_mode) 1196 req->base.data = idxd_desc; 1197 1198 dev_dbg(dev, "%s: compression mode %s," 1199 " desc->src1_addr %llx, desc->src1_size %d," 1200 " desc->dst_addr %llx, desc->max_dst_size %d," 1201 " desc->src2_addr %llx, desc->src2_size %d\n", __func__, 1202 active_compression_mode->name, 1203 desc->src1_addr, desc->src1_size, desc->dst_addr, 1204 desc->max_dst_size, desc->src2_addr, desc->src2_size); 1205 1206 ret = idxd_submit_desc(wq, idxd_desc); 1207 if (ret) { 1208 dev_dbg(dev, "submit_desc failed ret=%d\n", ret); 1209 goto err; 1210 } 1211 1212 /* Update stats */ 1213 update_total_comp_calls(); 1214 update_wq_comp_calls(wq); 1215 1216 if (ctx->async_mode) { 1217 ret = -EINPROGRESS; 1218 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); 1219 goto out; 1220 } 1221 1222 ret = check_completion(dev, idxd_desc->iax_completion, true, false); 1223 if (ret) { 1224 dev_dbg(dev, "check_completion failed ret=%d\n", ret); 1225 goto err; 1226 } 1227 1228 *dlen = idxd_desc->iax_completion->output_size; 1229 1230 /* Update stats */ 1231 update_total_comp_bytes_out(*dlen); 1232 update_wq_comp_bytes(wq, *dlen); 1233 1234 *compression_crc = idxd_desc->iax_completion->crc; 1235 1236 if (!ctx->async_mode) 1237 idxd_free_desc(wq, idxd_desc); 1238 out: 1239 return ret; 1240 err: 1241 idxd_free_desc(wq, idxd_desc); 1242 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret); 1243 1244 goto out; 1245 } 1246 1247 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, 1248 struct acomp_req *req, 1249 dma_addr_t *src_addr, dma_addr_t *dst_addr) 1250 { 1251 int ret = 0; 1252 int nr_sgs; 1253 1254 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1255 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1256 1257 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); 1258 if (nr_sgs <= 0 || nr_sgs > 1) { 1259 dev_dbg(dev, "verify: couldn't map src sg for iaa device %d," 1260 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1261 iaa_wq->wq->id, ret); 1262 ret = -EIO; 1263 goto out; 1264 } 1265 *src_addr = sg_dma_address(req->src); 1266 dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," 1267 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs, 1268 req->src, req->slen, sg_dma_len(req->src)); 1269 1270 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); 1271 if (nr_sgs <= 0 || nr_sgs > 1) { 1272 dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d," 1273 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1274 iaa_wq->wq->id, ret); 1275 ret = -EIO; 1276 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); 1277 goto out; 1278 } 1279 *dst_addr = sg_dma_address(req->dst); 1280 dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," 1281 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs, 1282 req->dst, req->dlen, sg_dma_len(req->dst)); 1283 out: 1284 return ret; 1285 } 1286 1287 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, 1288 struct idxd_wq *wq, 1289 dma_addr_t src_addr, unsigned int slen, 1290 dma_addr_t dst_addr, unsigned int *dlen, 1291 u32 compression_crc) 1292 { 1293 struct iaa_device_compression_mode *active_compression_mode; 1294 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1295 struct iaa_device *iaa_device; 1296 struct idxd_desc *idxd_desc; 1297 struct iax_hw_desc *desc; 1298 struct idxd_device *idxd; 1299 struct iaa_wq *iaa_wq; 1300 struct pci_dev *pdev; 1301 struct device *dev; 1302 int ret = 0; 1303 1304 iaa_wq = idxd_wq_get_private(wq); 1305 iaa_device = iaa_wq->iaa_device; 1306 idxd = iaa_device->idxd; 1307 pdev = idxd->pdev; 1308 dev = &pdev->dev; 1309 1310 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); 1311 1312 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); 1313 if (IS_ERR(idxd_desc)) { 1314 dev_dbg(dev, "idxd descriptor allocation failed\n"); 1315 dev_dbg(dev, "iaa compress failed: ret=%ld\n", 1316 PTR_ERR(idxd_desc)); 1317 return PTR_ERR(idxd_desc); 1318 } 1319 desc = idxd_desc->iax_hw; 1320 1321 /* Verify (optional) - decompress and check crc, suppress dest write */ 1322 1323 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC; 1324 desc->opcode = IAX_OPCODE_DECOMPRESS; 1325 desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT; 1326 desc->priv = 0; 1327 1328 desc->src1_addr = (u64)dst_addr; 1329 desc->src1_size = *dlen; 1330 desc->dst_addr = (u64)src_addr; 1331 desc->max_dst_size = slen; 1332 desc->completion_addr = idxd_desc->compl_dma; 1333 1334 dev_dbg(dev, "(verify) compression mode %s," 1335 " desc->src1_addr %llx, desc->src1_size %d," 1336 " desc->dst_addr %llx, desc->max_dst_size %d," 1337 " desc->src2_addr %llx, desc->src2_size %d\n", 1338 active_compression_mode->name, 1339 desc->src1_addr, desc->src1_size, desc->dst_addr, 1340 desc->max_dst_size, desc->src2_addr, desc->src2_size); 1341 1342 ret = idxd_submit_desc(wq, idxd_desc); 1343 if (ret) { 1344 dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret); 1345 goto err; 1346 } 1347 1348 ret = check_completion(dev, idxd_desc->iax_completion, false, false); 1349 if (ret) { 1350 dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret); 1351 goto err; 1352 } 1353 1354 if (compression_crc != idxd_desc->iax_completion->crc) { 1355 ret = -EINVAL; 1356 dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:" 1357 " comp=0x%x, decomp=0x%x\n", compression_crc, 1358 idxd_desc->iax_completion->crc); 1359 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 1360 8, 1, idxd_desc->iax_completion, 64, 0); 1361 goto err; 1362 } 1363 1364 idxd_free_desc(wq, idxd_desc); 1365 out: 1366 return ret; 1367 err: 1368 idxd_free_desc(wq, idxd_desc); 1369 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret); 1370 1371 goto out; 1372 } 1373 1374 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, 1375 struct idxd_wq *wq, 1376 dma_addr_t src_addr, unsigned int slen, 1377 dma_addr_t dst_addr, unsigned int *dlen, 1378 bool disable_async) 1379 { 1380 struct iaa_device_compression_mode *active_compression_mode; 1381 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1382 struct iaa_device *iaa_device; 1383 struct idxd_desc *idxd_desc; 1384 struct iax_hw_desc *desc; 1385 struct idxd_device *idxd; 1386 struct iaa_wq *iaa_wq; 1387 struct pci_dev *pdev; 1388 struct device *dev; 1389 int ret = 0; 1390 1391 iaa_wq = idxd_wq_get_private(wq); 1392 iaa_device = iaa_wq->iaa_device; 1393 idxd = iaa_device->idxd; 1394 pdev = idxd->pdev; 1395 dev = &pdev->dev; 1396 1397 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); 1398 1399 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); 1400 if (IS_ERR(idxd_desc)) { 1401 dev_dbg(dev, "idxd descriptor allocation failed\n"); 1402 dev_dbg(dev, "iaa decompress failed: ret=%ld\n", 1403 PTR_ERR(idxd_desc)); 1404 return PTR_ERR(idxd_desc); 1405 } 1406 desc = idxd_desc->iax_hw; 1407 1408 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC; 1409 desc->opcode = IAX_OPCODE_DECOMPRESS; 1410 desc->max_dst_size = PAGE_SIZE; 1411 desc->decompr_flags = IAA_DECOMP_FLAGS; 1412 desc->priv = 0; 1413 1414 desc->src1_addr = (u64)src_addr; 1415 desc->dst_addr = (u64)dst_addr; 1416 desc->max_dst_size = *dlen; 1417 desc->src1_size = slen; 1418 desc->completion_addr = idxd_desc->compl_dma; 1419 1420 if (ctx->use_irq && !disable_async) { 1421 desc->flags |= IDXD_OP_FLAG_RCI; 1422 1423 idxd_desc->crypto.req = req; 1424 idxd_desc->crypto.tfm = tfm; 1425 idxd_desc->crypto.src_addr = src_addr; 1426 idxd_desc->crypto.dst_addr = dst_addr; 1427 idxd_desc->crypto.compress = false; 1428 1429 dev_dbg(dev, "%s: use_async_irq compression mode %s," 1430 " src_addr %llx, dst_addr %llx\n", __func__, 1431 active_compression_mode->name, 1432 src_addr, dst_addr); 1433 } else if (ctx->async_mode && !disable_async) 1434 req->base.data = idxd_desc; 1435 1436 dev_dbg(dev, "%s: decompression mode %s," 1437 " desc->src1_addr %llx, desc->src1_size %d," 1438 " desc->dst_addr %llx, desc->max_dst_size %d," 1439 " desc->src2_addr %llx, desc->src2_size %d\n", __func__, 1440 active_compression_mode->name, 1441 desc->src1_addr, desc->src1_size, desc->dst_addr, 1442 desc->max_dst_size, desc->src2_addr, desc->src2_size); 1443 1444 ret = idxd_submit_desc(wq, idxd_desc); 1445 if (ret) { 1446 dev_dbg(dev, "submit_desc failed ret=%d\n", ret); 1447 goto err; 1448 } 1449 1450 /* Update stats */ 1451 update_total_decomp_calls(); 1452 update_wq_decomp_calls(wq); 1453 1454 if (ctx->async_mode && !disable_async) { 1455 ret = -EINPROGRESS; 1456 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); 1457 goto out; 1458 } 1459 1460 ret = check_completion(dev, idxd_desc->iax_completion, false, false); 1461 if (ret) { 1462 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret); 1463 if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) { 1464 pr_warn("%s: falling back to deflate-generic decompress, " 1465 "analytics error code %x\n", __func__, 1466 idxd_desc->iax_completion->error_code); 1467 ret = deflate_generic_decompress(req); 1468 if (ret) { 1469 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n", 1470 __func__, ret); 1471 goto err; 1472 } 1473 } else { 1474 goto err; 1475 } 1476 } else { 1477 req->dlen = idxd_desc->iax_completion->output_size; 1478 } 1479 1480 *dlen = req->dlen; 1481 1482 if (!ctx->async_mode || disable_async) 1483 idxd_free_desc(wq, idxd_desc); 1484 1485 /* Update stats */ 1486 update_total_decomp_bytes_in(slen); 1487 update_wq_decomp_bytes(wq, slen); 1488 out: 1489 return ret; 1490 err: 1491 idxd_free_desc(wq, idxd_desc); 1492 dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret); 1493 1494 goto out; 1495 } 1496 1497 static int iaa_comp_acompress(struct acomp_req *req) 1498 { 1499 struct iaa_compression_ctx *compression_ctx; 1500 struct crypto_tfm *tfm = req->base.tfm; 1501 dma_addr_t src_addr, dst_addr; 1502 int nr_sgs, cpu, ret = 0; 1503 struct iaa_wq *iaa_wq; 1504 u32 compression_crc; 1505 struct idxd_wq *wq; 1506 struct device *dev; 1507 1508 compression_ctx = crypto_tfm_ctx(tfm); 1509 1510 if (!iaa_crypto_enabled) { 1511 pr_debug("iaa_crypto disabled, not compressing\n"); 1512 return -ENODEV; 1513 } 1514 1515 if (!req->src || !req->slen) { 1516 pr_debug("invalid src, not compressing\n"); 1517 return -EINVAL; 1518 } 1519 1520 cpu = get_cpu(); 1521 wq = wq_table_next_wq(cpu); 1522 put_cpu(); 1523 if (!wq) { 1524 pr_debug("no wq configured for cpu=%d\n", cpu); 1525 return -ENODEV; 1526 } 1527 1528 ret = iaa_wq_get(wq); 1529 if (ret) { 1530 pr_debug("no wq available for cpu=%d\n", cpu); 1531 return -ENODEV; 1532 } 1533 1534 iaa_wq = idxd_wq_get_private(wq); 1535 1536 dev = &wq->idxd->pdev->dev; 1537 1538 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1539 if (nr_sgs <= 0 || nr_sgs > 1) { 1540 dev_dbg(dev, "couldn't map src sg for iaa device %d," 1541 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1542 iaa_wq->wq->id, ret); 1543 ret = -EIO; 1544 goto out; 1545 } 1546 src_addr = sg_dma_address(req->src); 1547 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," 1548 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, 1549 req->src, req->slen, sg_dma_len(req->src)); 1550 1551 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1552 if (nr_sgs <= 0 || nr_sgs > 1) { 1553 dev_dbg(dev, "couldn't map dst sg for iaa device %d," 1554 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1555 iaa_wq->wq->id, ret); 1556 ret = -EIO; 1557 goto err_map_dst; 1558 } 1559 dst_addr = sg_dma_address(req->dst); 1560 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," 1561 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, 1562 req->dst, req->dlen, sg_dma_len(req->dst)); 1563 1564 ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, 1565 &req->dlen, &compression_crc); 1566 if (ret == -EINPROGRESS) 1567 return ret; 1568 1569 if (!ret && compression_ctx->verify_compress) { 1570 ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr); 1571 if (ret) { 1572 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret); 1573 goto out; 1574 } 1575 1576 ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen, 1577 dst_addr, &req->dlen, compression_crc); 1578 if (ret) 1579 dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret); 1580 1581 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); 1582 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); 1583 1584 goto out; 1585 } 1586 1587 if (ret) 1588 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); 1589 1590 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1591 err_map_dst: 1592 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1593 out: 1594 iaa_wq_put(wq); 1595 1596 return ret; 1597 } 1598 1599 static int iaa_comp_adecompress(struct acomp_req *req) 1600 { 1601 struct crypto_tfm *tfm = req->base.tfm; 1602 dma_addr_t src_addr, dst_addr; 1603 int nr_sgs, cpu, ret = 0; 1604 struct iaa_wq *iaa_wq; 1605 struct device *dev; 1606 struct idxd_wq *wq; 1607 1608 if (!iaa_crypto_enabled) { 1609 pr_debug("iaa_crypto disabled, not decompressing\n"); 1610 return -ENODEV; 1611 } 1612 1613 if (!req->src || !req->slen) { 1614 pr_debug("invalid src, not decompressing\n"); 1615 return -EINVAL; 1616 } 1617 1618 cpu = get_cpu(); 1619 wq = wq_table_next_wq(cpu); 1620 put_cpu(); 1621 if (!wq) { 1622 pr_debug("no wq configured for cpu=%d\n", cpu); 1623 return -ENODEV; 1624 } 1625 1626 ret = iaa_wq_get(wq); 1627 if (ret) { 1628 pr_debug("no wq available for cpu=%d\n", cpu); 1629 return -ENODEV; 1630 } 1631 1632 iaa_wq = idxd_wq_get_private(wq); 1633 1634 dev = &wq->idxd->pdev->dev; 1635 1636 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1637 if (nr_sgs <= 0 || nr_sgs > 1) { 1638 dev_dbg(dev, "couldn't map src sg for iaa device %d," 1639 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1640 iaa_wq->wq->id, ret); 1641 ret = -EIO; 1642 goto out; 1643 } 1644 src_addr = sg_dma_address(req->src); 1645 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," 1646 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, 1647 req->src, req->slen, sg_dma_len(req->src)); 1648 1649 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1650 if (nr_sgs <= 0 || nr_sgs > 1) { 1651 dev_dbg(dev, "couldn't map dst sg for iaa device %d," 1652 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1653 iaa_wq->wq->id, ret); 1654 ret = -EIO; 1655 goto err_map_dst; 1656 } 1657 dst_addr = sg_dma_address(req->dst); 1658 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," 1659 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, 1660 req->dst, req->dlen, sg_dma_len(req->dst)); 1661 1662 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, 1663 dst_addr, &req->dlen, false); 1664 if (ret == -EINPROGRESS) 1665 return ret; 1666 1667 if (ret != 0) 1668 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret); 1669 1670 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1671 err_map_dst: 1672 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1673 out: 1674 iaa_wq_put(wq); 1675 1676 return ret; 1677 } 1678 1679 static void compression_ctx_init(struct iaa_compression_ctx *ctx) 1680 { 1681 ctx->verify_compress = iaa_verify_compress; 1682 ctx->async_mode = async_mode; 1683 ctx->use_irq = use_irq; 1684 } 1685 1686 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm) 1687 { 1688 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 1689 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1690 1691 compression_ctx_init(ctx); 1692 1693 ctx->mode = IAA_MODE_FIXED; 1694 1695 return 0; 1696 } 1697 1698 static struct acomp_alg iaa_acomp_fixed_deflate = { 1699 .init = iaa_comp_init_fixed, 1700 .compress = iaa_comp_acompress, 1701 .decompress = iaa_comp_adecompress, 1702 .base = { 1703 .cra_name = "deflate", 1704 .cra_driver_name = "deflate-iaa", 1705 .cra_flags = CRYPTO_ALG_ASYNC, 1706 .cra_ctxsize = sizeof(struct iaa_compression_ctx), 1707 .cra_module = THIS_MODULE, 1708 .cra_priority = IAA_ALG_PRIORITY, 1709 } 1710 }; 1711 1712 static int iaa_register_compression_device(void) 1713 { 1714 int ret; 1715 1716 ret = crypto_register_acomp(&iaa_acomp_fixed_deflate); 1717 if (ret) { 1718 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret); 1719 goto out; 1720 } 1721 1722 iaa_crypto_registered = true; 1723 out: 1724 return ret; 1725 } 1726 1727 static int iaa_unregister_compression_device(void) 1728 { 1729 if (iaa_crypto_registered) 1730 crypto_unregister_acomp(&iaa_acomp_fixed_deflate); 1731 1732 return 0; 1733 } 1734 1735 static int iaa_crypto_probe(struct idxd_dev *idxd_dev) 1736 { 1737 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); 1738 struct idxd_device *idxd = wq->idxd; 1739 struct idxd_driver_data *data = idxd->data; 1740 struct device *dev = &idxd_dev->conf_dev; 1741 bool first_wq = false; 1742 int ret = 0; 1743 1744 if (idxd->state != IDXD_DEV_ENABLED) 1745 return -ENXIO; 1746 1747 if (data->type != IDXD_TYPE_IAX) 1748 return -ENODEV; 1749 1750 mutex_lock(&wq->wq_lock); 1751 1752 if (idxd_wq_get_private(wq)) { 1753 mutex_unlock(&wq->wq_lock); 1754 return -EBUSY; 1755 } 1756 1757 if (!idxd_wq_driver_name_match(wq, dev)) { 1758 dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n", 1759 idxd->id, wq->id, wq->driver_name, dev->driver->name); 1760 idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME; 1761 ret = -ENODEV; 1762 goto err; 1763 } 1764 1765 wq->type = IDXD_WQT_KERNEL; 1766 1767 ret = idxd_drv_enable_wq(wq); 1768 if (ret < 0) { 1769 dev_dbg(dev, "enable wq %d.%d failed: %d\n", 1770 idxd->id, wq->id, ret); 1771 ret = -ENXIO; 1772 goto err; 1773 } 1774 1775 mutex_lock(&iaa_devices_lock); 1776 1777 if (list_empty(&iaa_devices)) { 1778 ret = alloc_wq_table(wq->idxd->max_wqs); 1779 if (ret) 1780 goto err_alloc; 1781 first_wq = true; 1782 } 1783 1784 ret = save_iaa_wq(wq); 1785 if (ret) 1786 goto err_save; 1787 1788 rebalance_wq_table(); 1789 1790 if (first_wq) { 1791 iaa_crypto_enabled = true; 1792 ret = iaa_register_compression_device(); 1793 if (ret != 0) { 1794 iaa_crypto_enabled = false; 1795 dev_dbg(dev, "IAA compression device registration failed\n"); 1796 goto err_register; 1797 } 1798 try_module_get(THIS_MODULE); 1799 1800 pr_info("iaa_crypto now ENABLED\n"); 1801 } 1802 1803 mutex_unlock(&iaa_devices_lock); 1804 out: 1805 mutex_unlock(&wq->wq_lock); 1806 1807 return ret; 1808 1809 err_register: 1810 remove_iaa_wq(wq); 1811 free_iaa_wq(idxd_wq_get_private(wq)); 1812 err_save: 1813 if (first_wq) 1814 free_wq_table(); 1815 err_alloc: 1816 mutex_unlock(&iaa_devices_lock); 1817 idxd_drv_disable_wq(wq); 1818 err: 1819 wq->type = IDXD_WQT_NONE; 1820 1821 goto out; 1822 } 1823 1824 static void iaa_crypto_remove(struct idxd_dev *idxd_dev) 1825 { 1826 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); 1827 struct idxd_device *idxd = wq->idxd; 1828 struct iaa_wq *iaa_wq; 1829 bool free = false; 1830 1831 idxd_wq_quiesce(wq); 1832 1833 mutex_lock(&wq->wq_lock); 1834 mutex_lock(&iaa_devices_lock); 1835 1836 remove_iaa_wq(wq); 1837 1838 spin_lock(&idxd->dev_lock); 1839 iaa_wq = idxd_wq_get_private(wq); 1840 if (!iaa_wq) { 1841 spin_unlock(&idxd->dev_lock); 1842 pr_err("%s: no iaa_wq available to remove\n", __func__); 1843 goto out; 1844 } 1845 1846 if (iaa_wq->ref) { 1847 iaa_wq->remove = true; 1848 } else { 1849 wq = iaa_wq->wq; 1850 idxd_wq_set_private(wq, NULL); 1851 free = true; 1852 } 1853 spin_unlock(&idxd->dev_lock); 1854 if (free) { 1855 __free_iaa_wq(iaa_wq); 1856 kfree(iaa_wq); 1857 } 1858 1859 idxd_drv_disable_wq(wq); 1860 rebalance_wq_table(); 1861 1862 if (nr_iaa == 0) { 1863 iaa_crypto_enabled = false; 1864 free_wq_table(); 1865 module_put(THIS_MODULE); 1866 1867 pr_info("iaa_crypto now DISABLED\n"); 1868 } 1869 out: 1870 mutex_unlock(&iaa_devices_lock); 1871 mutex_unlock(&wq->wq_lock); 1872 } 1873 1874 static enum idxd_dev_type dev_types[] = { 1875 IDXD_DEV_WQ, 1876 IDXD_DEV_NONE, 1877 }; 1878 1879 static struct idxd_device_driver iaa_crypto_driver = { 1880 .probe = iaa_crypto_probe, 1881 .remove = iaa_crypto_remove, 1882 .name = IDXD_SUBDRIVER_NAME, 1883 .type = dev_types, 1884 .desc_complete = iaa_desc_complete, 1885 }; 1886 1887 static int __init iaa_crypto_init_module(void) 1888 { 1889 int ret = 0; 1890 int node; 1891 1892 nr_cpus = num_possible_cpus(); 1893 for_each_node_with_cpus(node) 1894 nr_nodes++; 1895 if (!nr_nodes) { 1896 pr_err("IAA couldn't find any nodes with cpus\n"); 1897 return -ENODEV; 1898 } 1899 nr_cpus_per_node = nr_cpus / nr_nodes; 1900 1901 if (crypto_has_comp("deflate-generic", 0, 0)) 1902 deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0); 1903 1904 if (IS_ERR_OR_NULL(deflate_generic_tfm)) { 1905 pr_err("IAA could not alloc %s tfm: errcode = %ld\n", 1906 "deflate-generic", PTR_ERR(deflate_generic_tfm)); 1907 return -ENOMEM; 1908 } 1909 1910 ret = iaa_aecs_init_fixed(); 1911 if (ret < 0) { 1912 pr_debug("IAA fixed compression mode init failed\n"); 1913 goto err_aecs_init; 1914 } 1915 1916 ret = idxd_driver_register(&iaa_crypto_driver); 1917 if (ret) { 1918 pr_debug("IAA wq sub-driver registration failed\n"); 1919 goto err_driver_reg; 1920 } 1921 1922 ret = driver_create_file(&iaa_crypto_driver.drv, 1923 &driver_attr_verify_compress); 1924 if (ret) { 1925 pr_debug("IAA verify_compress attr creation failed\n"); 1926 goto err_verify_attr_create; 1927 } 1928 1929 ret = driver_create_file(&iaa_crypto_driver.drv, 1930 &driver_attr_sync_mode); 1931 if (ret) { 1932 pr_debug("IAA sync mode attr creation failed\n"); 1933 goto err_sync_attr_create; 1934 } 1935 1936 if (iaa_crypto_debugfs_init()) 1937 pr_warn("debugfs init failed, stats not available\n"); 1938 1939 pr_debug("initialized\n"); 1940 out: 1941 return ret; 1942 1943 err_sync_attr_create: 1944 driver_remove_file(&iaa_crypto_driver.drv, 1945 &driver_attr_verify_compress); 1946 err_verify_attr_create: 1947 idxd_driver_unregister(&iaa_crypto_driver); 1948 err_driver_reg: 1949 iaa_aecs_cleanup_fixed(); 1950 err_aecs_init: 1951 crypto_free_comp(deflate_generic_tfm); 1952 1953 goto out; 1954 } 1955 1956 static void __exit iaa_crypto_cleanup_module(void) 1957 { 1958 if (iaa_unregister_compression_device()) 1959 pr_debug("IAA compression device unregister failed\n"); 1960 1961 iaa_crypto_debugfs_cleanup(); 1962 driver_remove_file(&iaa_crypto_driver.drv, 1963 &driver_attr_sync_mode); 1964 driver_remove_file(&iaa_crypto_driver.drv, 1965 &driver_attr_verify_compress); 1966 idxd_driver_unregister(&iaa_crypto_driver); 1967 iaa_aecs_cleanup_fixed(); 1968 crypto_free_comp(deflate_generic_tfm); 1969 1970 pr_debug("cleaned up\n"); 1971 } 1972 1973 MODULE_IMPORT_NS("IDXD"); 1974 MODULE_LICENSE("GPL"); 1975 MODULE_ALIAS_IDXD_DEVICE(0); 1976 MODULE_AUTHOR("Intel Corporation"); 1977 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver"); 1978 1979 module_init(iaa_crypto_init_module); 1980 module_exit(iaa_crypto_cleanup_module); 1981