1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ 3 4 #include <linux/init.h> 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/pci.h> 8 #include <linux/sysfs.h> 9 #include <linux/device.h> 10 #include <linux/iommu.h> 11 #include <uapi/linux/idxd.h> 12 #include <linux/highmem.h> 13 #include <linux/sched/smt.h> 14 #include <crypto/internal/acompress.h> 15 16 #include "idxd.h" 17 #include "iaa_crypto.h" 18 #include "iaa_crypto_stats.h" 19 20 #ifdef pr_fmt 21 #undef pr_fmt 22 #endif 23 24 #define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt 25 26 #define IAA_ALG_PRIORITY 300 27 28 /* number of iaa instances probed */ 29 static unsigned int nr_iaa; 30 static unsigned int nr_cpus; 31 static unsigned int nr_nodes; 32 static unsigned int nr_cpus_per_node; 33 34 /* Number of physical cpus sharing each iaa instance */ 35 static unsigned int cpus_per_iaa; 36 37 /* Per-cpu lookup table for balanced wqs */ 38 static struct wq_table_entry __percpu *wq_table; 39 40 static struct idxd_wq *wq_table_next_wq(int cpu) 41 { 42 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 43 44 if (++entry->cur_wq >= entry->n_wqs) 45 entry->cur_wq = 0; 46 47 if (!entry->wqs[entry->cur_wq]) 48 return NULL; 49 50 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__, 51 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id, 52 entry->wqs[entry->cur_wq]->id, cpu); 53 54 return entry->wqs[entry->cur_wq]; 55 } 56 57 static void wq_table_add(int cpu, struct idxd_wq *wq) 58 { 59 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 60 61 if (WARN_ON(entry->n_wqs == entry->max_wqs)) 62 return; 63 64 entry->wqs[entry->n_wqs++] = wq; 65 66 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__, 67 entry->wqs[entry->n_wqs - 1]->idxd->id, 68 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu); 69 } 70 71 static void wq_table_free_entry(int cpu) 72 { 73 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 74 75 kfree(entry->wqs); 76 memset(entry, 0, sizeof(*entry)); 77 } 78 79 static void wq_table_clear_entry(int cpu) 80 { 81 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); 82 83 entry->n_wqs = 0; 84 entry->cur_wq = 0; 85 memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *)); 86 } 87 88 LIST_HEAD(iaa_devices); 89 DEFINE_MUTEX(iaa_devices_lock); 90 91 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */ 92 static bool iaa_crypto_enabled; 93 static bool iaa_crypto_registered; 94 95 /* Verify results of IAA compress or not */ 96 static bool iaa_verify_compress = true; 97 98 static ssize_t verify_compress_show(struct device_driver *driver, char *buf) 99 { 100 return sysfs_emit(buf, "%d\n", iaa_verify_compress); 101 } 102 103 static ssize_t verify_compress_store(struct device_driver *driver, 104 const char *buf, size_t count) 105 { 106 int ret = -EBUSY; 107 108 mutex_lock(&iaa_devices_lock); 109 110 if (iaa_crypto_enabled) 111 goto out; 112 113 ret = kstrtobool(buf, &iaa_verify_compress); 114 if (ret) 115 goto out; 116 117 ret = count; 118 out: 119 mutex_unlock(&iaa_devices_lock); 120 121 return ret; 122 } 123 static DRIVER_ATTR_RW(verify_compress); 124 125 /* 126 * The iaa crypto driver supports three 'sync' methods determining how 127 * compressions and decompressions are performed: 128 * 129 * - sync: the compression or decompression completes before 130 * returning. This is the mode used by the async crypto 131 * interface when the sync mode is set to 'sync' and by 132 * the sync crypto interface regardless of setting. 133 * 134 * - async: the compression or decompression is submitted and returns 135 * immediately. Completion interrupts are not used so 136 * the caller is responsible for polling the descriptor 137 * for completion. This mode is applicable to only the 138 * async crypto interface and is ignored for anything 139 * else. 140 * 141 * - async_irq: the compression or decompression is submitted and 142 * returns immediately. Completion interrupts are 143 * enabled so the caller can wait for the completion and 144 * yield to other threads. When the compression or 145 * decompression completes, the completion is signaled 146 * and the caller awakened. This mode is applicable to 147 * only the async crypto interface and is ignored for 148 * anything else. 149 * 150 * These modes can be set using the iaa_crypto sync_mode driver 151 * attribute. 152 */ 153 154 /* Use async mode */ 155 static bool async_mode; 156 /* Use interrupts */ 157 static bool use_irq; 158 159 /** 160 * set_iaa_sync_mode - Set IAA sync mode 161 * @name: The name of the sync mode 162 * 163 * Make the IAA sync mode named @name the current sync mode used by 164 * compression/decompression. 165 */ 166 167 static int set_iaa_sync_mode(const char *name) 168 { 169 int ret = 0; 170 171 if (sysfs_streq(name, "sync")) { 172 async_mode = false; 173 use_irq = false; 174 } else if (sysfs_streq(name, "async")) { 175 async_mode = false; 176 use_irq = false; 177 } else if (sysfs_streq(name, "async_irq")) { 178 async_mode = true; 179 use_irq = true; 180 } else { 181 ret = -EINVAL; 182 } 183 184 return ret; 185 } 186 187 static ssize_t sync_mode_show(struct device_driver *driver, char *buf) 188 { 189 int ret = 0; 190 191 if (!async_mode && !use_irq) 192 ret = sysfs_emit(buf, "%s\n", "sync"); 193 else if (async_mode && !use_irq) 194 ret = sysfs_emit(buf, "%s\n", "async"); 195 else if (async_mode && use_irq) 196 ret = sysfs_emit(buf, "%s\n", "async_irq"); 197 198 return ret; 199 } 200 201 static ssize_t sync_mode_store(struct device_driver *driver, 202 const char *buf, size_t count) 203 { 204 int ret = -EBUSY; 205 206 mutex_lock(&iaa_devices_lock); 207 208 if (iaa_crypto_enabled) 209 goto out; 210 211 ret = set_iaa_sync_mode(buf); 212 if (ret == 0) 213 ret = count; 214 out: 215 mutex_unlock(&iaa_devices_lock); 216 217 return ret; 218 } 219 static DRIVER_ATTR_RW(sync_mode); 220 221 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX]; 222 223 static int find_empty_iaa_compression_mode(void) 224 { 225 int i; 226 227 for (i = 0; i < IAA_COMP_MODES_MAX; i++) 228 if (!iaa_compression_modes[i]) 229 return i; 230 231 return -EINVAL; 232 } 233 234 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx) 235 { 236 struct iaa_compression_mode *mode; 237 int i; 238 239 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 240 mode = iaa_compression_modes[i]; 241 if (!mode) 242 continue; 243 244 if (!strcmp(mode->name, name)) { 245 *idx = i; 246 return iaa_compression_modes[i]; 247 } 248 } 249 250 return NULL; 251 } 252 253 static void free_iaa_compression_mode(struct iaa_compression_mode *mode) 254 { 255 kfree(mode->name); 256 kfree(mode->ll_table); 257 kfree(mode->d_table); 258 259 kfree(mode); 260 } 261 262 /* 263 * IAA Compression modes are defined by an ll_table and a d_table. 264 * These tables are typically generated and captured using statistics 265 * collected from running actual compress/decompress workloads. 266 * 267 * A module or other kernel code can add and remove compression modes 268 * with a given name using the exported @add_iaa_compression_mode() 269 * and @remove_iaa_compression_mode functions. 270 * 271 * When a new compression mode is added, the tables are saved in a 272 * global compression mode list. When IAA devices are added, a 273 * per-IAA device dma mapping is created for each IAA device, for each 274 * compression mode. These are the tables used to do the actual 275 * compression/deccompression and are unmapped if/when the devices are 276 * removed. Currently, compression modes must be added before any 277 * device is added, and removed after all devices have been removed. 278 */ 279 280 /** 281 * remove_iaa_compression_mode - Remove an IAA compression mode 282 * @name: The name the compression mode will be known as 283 * 284 * Remove the IAA compression mode named @name. 285 */ 286 void remove_iaa_compression_mode(const char *name) 287 { 288 struct iaa_compression_mode *mode; 289 int idx; 290 291 mutex_lock(&iaa_devices_lock); 292 293 if (!list_empty(&iaa_devices)) 294 goto out; 295 296 mode = find_iaa_compression_mode(name, &idx); 297 if (mode) { 298 free_iaa_compression_mode(mode); 299 iaa_compression_modes[idx] = NULL; 300 } 301 out: 302 mutex_unlock(&iaa_devices_lock); 303 } 304 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); 305 306 /** 307 * add_iaa_compression_mode - Add an IAA compression mode 308 * @name: The name the compression mode will be known as 309 * @ll_table: The ll table 310 * @ll_table_size: The ll table size in bytes 311 * @d_table: The d table 312 * @d_table_size: The d table size in bytes 313 * @init: Optional callback function to init the compression mode data 314 * @free: Optional callback function to free the compression mode data 315 * 316 * Add a new IAA compression mode named @name. 317 * 318 * Returns 0 if successful, errcode otherwise. 319 */ 320 int add_iaa_compression_mode(const char *name, 321 const u32 *ll_table, 322 int ll_table_size, 323 const u32 *d_table, 324 int d_table_size, 325 iaa_dev_comp_init_fn_t init, 326 iaa_dev_comp_free_fn_t free) 327 { 328 struct iaa_compression_mode *mode; 329 int idx, ret = -ENOMEM; 330 331 mutex_lock(&iaa_devices_lock); 332 333 if (!list_empty(&iaa_devices)) { 334 ret = -EBUSY; 335 goto out; 336 } 337 338 mode = kzalloc(sizeof(*mode), GFP_KERNEL); 339 if (!mode) 340 goto out; 341 342 mode->name = kstrdup(name, GFP_KERNEL); 343 if (!mode->name) 344 goto free; 345 346 if (ll_table) { 347 mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL); 348 if (!mode->ll_table) 349 goto free; 350 mode->ll_table_size = ll_table_size; 351 } 352 353 if (d_table) { 354 mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL); 355 if (!mode->d_table) 356 goto free; 357 mode->d_table_size = d_table_size; 358 } 359 360 mode->init = init; 361 mode->free = free; 362 363 idx = find_empty_iaa_compression_mode(); 364 if (idx < 0) 365 goto free; 366 367 pr_debug("IAA compression mode %s added at idx %d\n", 368 mode->name, idx); 369 370 iaa_compression_modes[idx] = mode; 371 372 ret = 0; 373 out: 374 mutex_unlock(&iaa_devices_lock); 375 376 return ret; 377 free: 378 free_iaa_compression_mode(mode); 379 goto out; 380 } 381 EXPORT_SYMBOL_GPL(add_iaa_compression_mode); 382 383 static struct iaa_device_compression_mode * 384 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx) 385 { 386 return iaa_device->compression_modes[idx]; 387 } 388 389 static void free_device_compression_mode(struct iaa_device *iaa_device, 390 struct iaa_device_compression_mode *device_mode) 391 { 392 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN; 393 struct device *dev = &iaa_device->idxd->pdev->dev; 394 395 kfree(device_mode->name); 396 397 if (device_mode->aecs_comp_table) 398 dma_free_coherent(dev, size, device_mode->aecs_comp_table, 399 device_mode->aecs_comp_table_dma_addr); 400 kfree(device_mode); 401 } 402 403 #define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000 404 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC) 405 #define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS) 406 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS) 407 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \ 408 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \ 409 IDXD_OP_FLAG_AECS_RW_TGLS) 410 411 static int check_completion(struct device *dev, 412 struct iax_completion_record *comp, 413 bool compress, 414 bool only_once); 415 416 static int init_device_compression_mode(struct iaa_device *iaa_device, 417 struct iaa_compression_mode *mode, 418 int idx, struct idxd_wq *wq) 419 { 420 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN; 421 struct device *dev = &iaa_device->idxd->pdev->dev; 422 struct iaa_device_compression_mode *device_mode; 423 int ret = -ENOMEM; 424 425 device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL); 426 if (!device_mode) 427 return -ENOMEM; 428 429 device_mode->name = kstrdup(mode->name, GFP_KERNEL); 430 if (!device_mode->name) 431 goto free; 432 433 device_mode->aecs_comp_table = dma_alloc_coherent(dev, size, 434 &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL); 435 if (!device_mode->aecs_comp_table) 436 goto free; 437 438 /* Add Huffman table to aecs */ 439 memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); 440 memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); 441 memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); 442 443 if (mode->init) { 444 ret = mode->init(device_mode); 445 if (ret) 446 goto free; 447 } 448 449 /* mode index should match iaa_compression_modes idx */ 450 iaa_device->compression_modes[idx] = device_mode; 451 452 pr_debug("IAA %s compression mode initialized for iaa device %d\n", 453 mode->name, iaa_device->idxd->id); 454 455 ret = 0; 456 out: 457 return ret; 458 free: 459 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n", 460 mode->name, iaa_device->idxd->id); 461 462 free_device_compression_mode(iaa_device, device_mode); 463 goto out; 464 } 465 466 static int init_device_compression_modes(struct iaa_device *iaa_device, 467 struct idxd_wq *wq) 468 { 469 struct iaa_compression_mode *mode; 470 int i, ret = 0; 471 472 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 473 mode = iaa_compression_modes[i]; 474 if (!mode) 475 continue; 476 477 ret = init_device_compression_mode(iaa_device, mode, i, wq); 478 if (ret) 479 break; 480 } 481 482 return ret; 483 } 484 485 static void remove_device_compression_modes(struct iaa_device *iaa_device) 486 { 487 struct iaa_device_compression_mode *device_mode; 488 int i; 489 490 for (i = 0; i < IAA_COMP_MODES_MAX; i++) { 491 device_mode = iaa_device->compression_modes[i]; 492 if (!device_mode) 493 continue; 494 495 if (iaa_compression_modes[i]->free) 496 iaa_compression_modes[i]->free(device_mode); 497 free_device_compression_mode(iaa_device, device_mode); 498 iaa_device->compression_modes[i] = NULL; 499 } 500 } 501 502 static struct iaa_device *iaa_device_alloc(void) 503 { 504 struct iaa_device *iaa_device; 505 506 iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL); 507 if (!iaa_device) 508 return NULL; 509 510 INIT_LIST_HEAD(&iaa_device->wqs); 511 512 return iaa_device; 513 } 514 515 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq) 516 { 517 struct iaa_wq *iaa_wq; 518 519 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) { 520 if (iaa_wq->wq == wq) 521 return true; 522 } 523 524 return false; 525 } 526 527 static struct iaa_device *add_iaa_device(struct idxd_device *idxd) 528 { 529 struct iaa_device *iaa_device; 530 531 iaa_device = iaa_device_alloc(); 532 if (!iaa_device) 533 return NULL; 534 535 iaa_device->idxd = idxd; 536 537 list_add_tail(&iaa_device->list, &iaa_devices); 538 539 nr_iaa++; 540 541 return iaa_device; 542 } 543 544 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq) 545 { 546 return init_device_compression_modes(iaa_device, iaa_wq->wq); 547 } 548 549 static void del_iaa_device(struct iaa_device *iaa_device) 550 { 551 list_del(&iaa_device->list); 552 553 nr_iaa--; 554 } 555 556 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq, 557 struct iaa_wq **new_wq) 558 { 559 struct idxd_device *idxd = iaa_device->idxd; 560 struct pci_dev *pdev = idxd->pdev; 561 struct device *dev = &pdev->dev; 562 struct iaa_wq *iaa_wq; 563 564 iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL); 565 if (!iaa_wq) 566 return -ENOMEM; 567 568 iaa_wq->wq = wq; 569 iaa_wq->iaa_device = iaa_device; 570 idxd_wq_set_private(wq, iaa_wq); 571 572 list_add_tail(&iaa_wq->list, &iaa_device->wqs); 573 574 iaa_device->n_wq++; 575 576 if (new_wq) 577 *new_wq = iaa_wq; 578 579 dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n", 580 wq->id, iaa_device->idxd->id, iaa_device->n_wq); 581 582 return 0; 583 } 584 585 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq) 586 { 587 struct idxd_device *idxd = iaa_device->idxd; 588 struct pci_dev *pdev = idxd->pdev; 589 struct device *dev = &pdev->dev; 590 struct iaa_wq *iaa_wq; 591 592 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) { 593 if (iaa_wq->wq == wq) { 594 list_del(&iaa_wq->list); 595 iaa_device->n_wq--; 596 597 dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n", 598 wq->id, iaa_device->idxd->id, 599 iaa_device->n_wq, nr_iaa); 600 601 if (iaa_device->n_wq == 0) 602 del_iaa_device(iaa_device); 603 break; 604 } 605 } 606 } 607 608 static void clear_wq_table(void) 609 { 610 int cpu; 611 612 for (cpu = 0; cpu < nr_cpus; cpu++) 613 wq_table_clear_entry(cpu); 614 615 pr_debug("cleared wq table\n"); 616 } 617 618 static void free_iaa_device(struct iaa_device *iaa_device) 619 { 620 if (!iaa_device) 621 return; 622 623 remove_device_compression_modes(iaa_device); 624 kfree(iaa_device); 625 } 626 627 static void __free_iaa_wq(struct iaa_wq *iaa_wq) 628 { 629 struct iaa_device *iaa_device; 630 631 if (!iaa_wq) 632 return; 633 634 iaa_device = iaa_wq->iaa_device; 635 if (iaa_device->n_wq == 0) 636 free_iaa_device(iaa_wq->iaa_device); 637 } 638 639 static void free_iaa_wq(struct iaa_wq *iaa_wq) 640 { 641 struct idxd_wq *wq; 642 643 __free_iaa_wq(iaa_wq); 644 645 wq = iaa_wq->wq; 646 647 kfree(iaa_wq); 648 idxd_wq_set_private(wq, NULL); 649 } 650 651 static int iaa_wq_get(struct idxd_wq *wq) 652 { 653 struct idxd_device *idxd = wq->idxd; 654 struct iaa_wq *iaa_wq; 655 int ret = 0; 656 657 spin_lock(&idxd->dev_lock); 658 iaa_wq = idxd_wq_get_private(wq); 659 if (iaa_wq && !iaa_wq->remove) { 660 iaa_wq->ref++; 661 idxd_wq_get(wq); 662 } else { 663 ret = -ENODEV; 664 } 665 spin_unlock(&idxd->dev_lock); 666 667 return ret; 668 } 669 670 static int iaa_wq_put(struct idxd_wq *wq) 671 { 672 struct idxd_device *idxd = wq->idxd; 673 struct iaa_wq *iaa_wq; 674 bool free = false; 675 int ret = 0; 676 677 spin_lock(&idxd->dev_lock); 678 iaa_wq = idxd_wq_get_private(wq); 679 if (iaa_wq) { 680 iaa_wq->ref--; 681 if (iaa_wq->ref == 0 && iaa_wq->remove) { 682 idxd_wq_set_private(wq, NULL); 683 free = true; 684 } 685 idxd_wq_put(wq); 686 } else { 687 ret = -ENODEV; 688 } 689 spin_unlock(&idxd->dev_lock); 690 if (free) { 691 __free_iaa_wq(iaa_wq); 692 kfree(iaa_wq); 693 } 694 695 return ret; 696 } 697 698 static void free_wq_table(void) 699 { 700 int cpu; 701 702 for (cpu = 0; cpu < nr_cpus; cpu++) 703 wq_table_free_entry(cpu); 704 705 free_percpu(wq_table); 706 707 pr_debug("freed wq table\n"); 708 } 709 710 static int alloc_wq_table(int max_wqs) 711 { 712 struct wq_table_entry *entry; 713 int cpu; 714 715 wq_table = alloc_percpu(struct wq_table_entry); 716 if (!wq_table) 717 return -ENOMEM; 718 719 for (cpu = 0; cpu < nr_cpus; cpu++) { 720 entry = per_cpu_ptr(wq_table, cpu); 721 entry->wqs = kcalloc(max_wqs, sizeof(*entry->wqs), GFP_KERNEL); 722 if (!entry->wqs) { 723 free_wq_table(); 724 return -ENOMEM; 725 } 726 727 entry->max_wqs = max_wqs; 728 } 729 730 pr_debug("initialized wq table\n"); 731 732 return 0; 733 } 734 735 static int save_iaa_wq(struct idxd_wq *wq) 736 { 737 struct iaa_device *iaa_device, *found = NULL; 738 struct idxd_device *idxd; 739 struct pci_dev *pdev; 740 struct device *dev; 741 int ret = 0; 742 743 list_for_each_entry(iaa_device, &iaa_devices, list) { 744 if (iaa_device->idxd == wq->idxd) { 745 idxd = iaa_device->idxd; 746 pdev = idxd->pdev; 747 dev = &pdev->dev; 748 /* 749 * Check to see that we don't already have this wq. 750 * Shouldn't happen but we don't control probing. 751 */ 752 if (iaa_has_wq(iaa_device, wq)) { 753 dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n", 754 iaa_device); 755 goto out; 756 } 757 758 found = iaa_device; 759 760 ret = add_iaa_wq(iaa_device, wq, NULL); 761 if (ret) 762 goto out; 763 764 break; 765 } 766 } 767 768 if (!found) { 769 struct iaa_device *new_device; 770 struct iaa_wq *new_wq; 771 772 new_device = add_iaa_device(wq->idxd); 773 if (!new_device) { 774 ret = -ENOMEM; 775 goto out; 776 } 777 778 ret = add_iaa_wq(new_device, wq, &new_wq); 779 if (ret) { 780 del_iaa_device(new_device); 781 free_iaa_device(new_device); 782 goto out; 783 } 784 785 ret = init_iaa_device(new_device, new_wq); 786 if (ret) { 787 del_iaa_wq(new_device, new_wq->wq); 788 del_iaa_device(new_device); 789 free_iaa_wq(new_wq); 790 goto out; 791 } 792 } 793 794 if (WARN_ON(nr_iaa == 0)) 795 return -EINVAL; 796 797 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; 798 if (!cpus_per_iaa) 799 cpus_per_iaa = 1; 800 out: 801 return ret; 802 } 803 804 static void remove_iaa_wq(struct idxd_wq *wq) 805 { 806 struct iaa_device *iaa_device; 807 808 list_for_each_entry(iaa_device, &iaa_devices, list) { 809 if (iaa_has_wq(iaa_device, wq)) { 810 del_iaa_wq(iaa_device, wq); 811 break; 812 } 813 } 814 815 if (nr_iaa) { 816 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; 817 if (!cpus_per_iaa) 818 cpus_per_iaa = 1; 819 } else 820 cpus_per_iaa = 1; 821 } 822 823 static int wq_table_add_wqs(int iaa, int cpu) 824 { 825 struct iaa_device *iaa_device, *found_device = NULL; 826 int ret = 0, cur_iaa = 0, n_wqs_added = 0; 827 struct idxd_device *idxd; 828 struct iaa_wq *iaa_wq; 829 struct pci_dev *pdev; 830 struct device *dev; 831 832 list_for_each_entry(iaa_device, &iaa_devices, list) { 833 idxd = iaa_device->idxd; 834 pdev = idxd->pdev; 835 dev = &pdev->dev; 836 837 if (cur_iaa != iaa) { 838 cur_iaa++; 839 continue; 840 } 841 842 found_device = iaa_device; 843 dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n", 844 found_device->idxd->id, cur_iaa); 845 break; 846 } 847 848 if (!found_device) { 849 found_device = list_first_entry_or_null(&iaa_devices, 850 struct iaa_device, list); 851 if (!found_device) { 852 pr_debug("couldn't find any iaa devices with wqs!\n"); 853 ret = -EINVAL; 854 goto out; 855 } 856 cur_iaa = 0; 857 858 idxd = found_device->idxd; 859 pdev = idxd->pdev; 860 dev = &pdev->dev; 861 dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n", 862 found_device->idxd->id, cur_iaa); 863 } 864 865 list_for_each_entry(iaa_wq, &found_device->wqs, list) { 866 wq_table_add(cpu, iaa_wq->wq); 867 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n", 868 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id); 869 n_wqs_added++; 870 } 871 872 if (!n_wqs_added) { 873 pr_debug("couldn't find any iaa wqs!\n"); 874 ret = -EINVAL; 875 goto out; 876 } 877 out: 878 return ret; 879 } 880 881 /* 882 * Rebalance the wq table so that given a cpu, it's easy to find the 883 * closest IAA instance. The idea is to try to choose the most 884 * appropriate IAA instance for a caller and spread available 885 * workqueues around to clients. 886 */ 887 static void rebalance_wq_table(void) 888 { 889 const struct cpumask *node_cpus; 890 int node_cpu, node, cpu, iaa = 0; 891 892 if (nr_iaa == 0) 893 return; 894 895 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n", 896 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa); 897 898 clear_wq_table(); 899 900 if (nr_iaa == 1) { 901 for_each_possible_cpu(cpu) { 902 if (WARN_ON(wq_table_add_wqs(0, cpu))) 903 goto err; 904 } 905 906 return; 907 } 908 909 for_each_node_with_cpus(node) { 910 cpu = 0; 911 node_cpus = cpumask_of_node(node); 912 913 for_each_cpu(node_cpu, node_cpus) { 914 iaa = cpu / cpus_per_iaa; 915 if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) 916 goto err; 917 cpu++; 918 } 919 } 920 921 return; 922 err: 923 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); 924 } 925 926 static inline int check_completion(struct device *dev, 927 struct iax_completion_record *comp, 928 bool compress, 929 bool only_once) 930 { 931 char *op_str = compress ? "compress" : "decompress"; 932 int status_checks = 0; 933 int ret = 0; 934 935 while (!comp->status) { 936 if (only_once) 937 return -EAGAIN; 938 cpu_relax(); 939 if (status_checks++ >= IAA_COMPLETION_TIMEOUT) { 940 /* Something is wrong with the hw, disable it. */ 941 dev_err(dev, "%s completion timed out - " 942 "assuming broken hw, iaa_crypto now DISABLED\n", 943 op_str); 944 iaa_crypto_enabled = false; 945 ret = -ETIMEDOUT; 946 goto out; 947 } 948 } 949 950 if (comp->status != IAX_COMP_SUCCESS) { 951 if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) { 952 ret = -ETIMEDOUT; 953 dev_dbg(dev, "%s timed out, size=0x%x\n", 954 op_str, comp->output_size); 955 update_completion_timeout_errs(); 956 goto out; 957 } 958 959 if (comp->status == IAA_ANALYTICS_ERROR && 960 comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) { 961 ret = -E2BIG; 962 dev_dbg(dev, "compressed > uncompressed size," 963 " not compressing, size=0x%x\n", 964 comp->output_size); 965 update_completion_comp_buf_overflow_errs(); 966 goto out; 967 } 968 969 if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) { 970 ret = -EOVERFLOW; 971 goto out; 972 } 973 974 ret = -EINVAL; 975 dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n", 976 op_str, comp->status, comp->error_code, comp->output_size); 977 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0); 978 update_completion_einval_errs(); 979 980 goto out; 981 } 982 out: 983 return ret; 984 } 985 986 static int deflate_generic_decompress(struct acomp_req *req) 987 { 988 ACOMP_FBREQ_ON_STACK(fbreq, req); 989 int ret; 990 991 ret = crypto_acomp_decompress(fbreq); 992 req->dlen = fbreq->dlen; 993 994 update_total_sw_decomp_calls(); 995 996 return ret; 997 } 998 999 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, 1000 struct acomp_req *req, 1001 dma_addr_t *src_addr, dma_addr_t *dst_addr); 1002 1003 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, 1004 struct idxd_wq *wq, 1005 dma_addr_t src_addr, unsigned int slen, 1006 dma_addr_t dst_addr, unsigned int *dlen); 1007 1008 static void iaa_desc_complete(struct idxd_desc *idxd_desc, 1009 enum idxd_complete_type comp_type, 1010 bool free_desc, void *__ctx, 1011 u32 *status) 1012 { 1013 struct iaa_device_compression_mode *active_compression_mode; 1014 struct iaa_compression_ctx *compression_ctx; 1015 struct crypto_ctx *ctx = __ctx; 1016 struct iaa_device *iaa_device; 1017 struct idxd_device *idxd; 1018 struct iaa_wq *iaa_wq; 1019 struct pci_dev *pdev; 1020 struct device *dev; 1021 int ret, err = 0; 1022 1023 compression_ctx = crypto_tfm_ctx(ctx->tfm); 1024 1025 iaa_wq = idxd_wq_get_private(idxd_desc->wq); 1026 iaa_device = iaa_wq->iaa_device; 1027 idxd = iaa_device->idxd; 1028 pdev = idxd->pdev; 1029 dev = &pdev->dev; 1030 1031 active_compression_mode = get_iaa_device_compression_mode(iaa_device, 1032 compression_ctx->mode); 1033 dev_dbg(dev, "%s: compression mode %s," 1034 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__, 1035 active_compression_mode->name, 1036 ctx->src_addr, ctx->dst_addr); 1037 1038 ret = check_completion(dev, idxd_desc->iax_completion, 1039 ctx->compress, false); 1040 if (ret) { 1041 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret); 1042 if (!ctx->compress && 1043 idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) { 1044 pr_warn("%s: falling back to deflate-generic decompress, " 1045 "analytics error code %x\n", __func__, 1046 idxd_desc->iax_completion->error_code); 1047 ret = deflate_generic_decompress(ctx->req); 1048 if (ret) { 1049 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n", 1050 __func__, ret); 1051 err = -EIO; 1052 goto err; 1053 } 1054 } else { 1055 err = -EIO; 1056 goto err; 1057 } 1058 } else { 1059 ctx->req->dlen = idxd_desc->iax_completion->output_size; 1060 } 1061 1062 /* Update stats */ 1063 if (ctx->compress) { 1064 update_total_comp_bytes_out(ctx->req->dlen); 1065 update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); 1066 } else { 1067 update_total_decomp_bytes_in(ctx->req->slen); 1068 update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen); 1069 } 1070 1071 if (ctx->compress && compression_ctx->verify_compress) { 1072 u32 *compression_crc = acomp_request_ctx(ctx->req); 1073 dma_addr_t src_addr, dst_addr; 1074 1075 *compression_crc = idxd_desc->iax_completion->crc; 1076 1077 ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr); 1078 if (ret) { 1079 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret); 1080 err = -EIO; 1081 goto out; 1082 } 1083 1084 ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr, 1085 ctx->req->slen, dst_addr, &ctx->req->dlen); 1086 if (ret) { 1087 dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret); 1088 err = -EIO; 1089 } 1090 1091 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE); 1092 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE); 1093 1094 goto out; 1095 } 1096 err: 1097 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE); 1098 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE); 1099 out: 1100 if (ret != 0) 1101 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); 1102 1103 if (ctx->req->base.complete) 1104 acomp_request_complete(ctx->req, err); 1105 1106 if (free_desc) 1107 idxd_free_desc(idxd_desc->wq, idxd_desc); 1108 iaa_wq_put(idxd_desc->wq); 1109 } 1110 1111 static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, 1112 struct idxd_wq *wq, 1113 dma_addr_t src_addr, unsigned int slen, 1114 dma_addr_t dst_addr, unsigned int *dlen) 1115 { 1116 struct iaa_device_compression_mode *active_compression_mode; 1117 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1118 u32 *compression_crc = acomp_request_ctx(req); 1119 struct iaa_device *iaa_device; 1120 struct idxd_desc *idxd_desc; 1121 struct iax_hw_desc *desc; 1122 struct idxd_device *idxd; 1123 struct iaa_wq *iaa_wq; 1124 struct pci_dev *pdev; 1125 struct device *dev; 1126 int ret = 0; 1127 1128 iaa_wq = idxd_wq_get_private(wq); 1129 iaa_device = iaa_wq->iaa_device; 1130 idxd = iaa_device->idxd; 1131 pdev = idxd->pdev; 1132 dev = &pdev->dev; 1133 1134 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); 1135 1136 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); 1137 if (IS_ERR(idxd_desc)) { 1138 dev_dbg(dev, "idxd descriptor allocation failed\n"); 1139 dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc)); 1140 return PTR_ERR(idxd_desc); 1141 } 1142 desc = idxd_desc->iax_hw; 1143 1144 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | 1145 IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC; 1146 desc->opcode = IAX_OPCODE_COMPRESS; 1147 desc->compr_flags = IAA_COMP_FLAGS; 1148 desc->priv = 0; 1149 1150 desc->src1_addr = (u64)src_addr; 1151 desc->src1_size = slen; 1152 desc->dst_addr = (u64)dst_addr; 1153 desc->max_dst_size = *dlen; 1154 desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr; 1155 desc->src2_size = sizeof(struct aecs_comp_table_record); 1156 desc->completion_addr = idxd_desc->compl_dma; 1157 1158 if (ctx->use_irq) { 1159 desc->flags |= IDXD_OP_FLAG_RCI; 1160 1161 idxd_desc->crypto.req = req; 1162 idxd_desc->crypto.tfm = tfm; 1163 idxd_desc->crypto.src_addr = src_addr; 1164 idxd_desc->crypto.dst_addr = dst_addr; 1165 idxd_desc->crypto.compress = true; 1166 1167 dev_dbg(dev, "%s use_async_irq: compression mode %s," 1168 " src_addr %llx, dst_addr %llx\n", __func__, 1169 active_compression_mode->name, 1170 src_addr, dst_addr); 1171 } 1172 1173 dev_dbg(dev, "%s: compression mode %s," 1174 " desc->src1_addr %llx, desc->src1_size %d," 1175 " desc->dst_addr %llx, desc->max_dst_size %d," 1176 " desc->src2_addr %llx, desc->src2_size %d\n", __func__, 1177 active_compression_mode->name, 1178 desc->src1_addr, desc->src1_size, desc->dst_addr, 1179 desc->max_dst_size, desc->src2_addr, desc->src2_size); 1180 1181 ret = idxd_submit_desc(wq, idxd_desc); 1182 if (ret) { 1183 dev_dbg(dev, "submit_desc failed ret=%d\n", ret); 1184 goto err; 1185 } 1186 1187 /* Update stats */ 1188 update_total_comp_calls(); 1189 update_wq_comp_calls(wq); 1190 1191 if (ctx->async_mode) { 1192 ret = -EINPROGRESS; 1193 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); 1194 goto out; 1195 } 1196 1197 ret = check_completion(dev, idxd_desc->iax_completion, true, false); 1198 if (ret) { 1199 dev_dbg(dev, "check_completion failed ret=%d\n", ret); 1200 goto err; 1201 } 1202 1203 *dlen = idxd_desc->iax_completion->output_size; 1204 1205 /* Update stats */ 1206 update_total_comp_bytes_out(*dlen); 1207 update_wq_comp_bytes(wq, *dlen); 1208 1209 *compression_crc = idxd_desc->iax_completion->crc; 1210 1211 if (!ctx->async_mode) 1212 idxd_free_desc(wq, idxd_desc); 1213 out: 1214 return ret; 1215 err: 1216 idxd_free_desc(wq, idxd_desc); 1217 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret); 1218 1219 goto out; 1220 } 1221 1222 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, 1223 struct acomp_req *req, 1224 dma_addr_t *src_addr, dma_addr_t *dst_addr) 1225 { 1226 int ret = 0; 1227 int nr_sgs; 1228 1229 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1230 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1231 1232 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); 1233 if (nr_sgs <= 0 || nr_sgs > 1) { 1234 dev_dbg(dev, "verify: couldn't map src sg for iaa device %d," 1235 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1236 iaa_wq->wq->id, ret); 1237 ret = -EIO; 1238 goto out; 1239 } 1240 *src_addr = sg_dma_address(req->src); 1241 dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," 1242 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs, 1243 req->src, req->slen, sg_dma_len(req->src)); 1244 1245 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); 1246 if (nr_sgs <= 0 || nr_sgs > 1) { 1247 dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d," 1248 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1249 iaa_wq->wq->id, ret); 1250 ret = -EIO; 1251 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); 1252 goto out; 1253 } 1254 *dst_addr = sg_dma_address(req->dst); 1255 dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," 1256 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs, 1257 req->dst, req->dlen, sg_dma_len(req->dst)); 1258 out: 1259 return ret; 1260 } 1261 1262 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, 1263 struct idxd_wq *wq, 1264 dma_addr_t src_addr, unsigned int slen, 1265 dma_addr_t dst_addr, unsigned int *dlen) 1266 { 1267 struct iaa_device_compression_mode *active_compression_mode; 1268 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1269 u32 *compression_crc = acomp_request_ctx(req); 1270 struct iaa_device *iaa_device; 1271 struct idxd_desc *idxd_desc; 1272 struct iax_hw_desc *desc; 1273 struct idxd_device *idxd; 1274 struct iaa_wq *iaa_wq; 1275 struct pci_dev *pdev; 1276 struct device *dev; 1277 int ret = 0; 1278 1279 iaa_wq = idxd_wq_get_private(wq); 1280 iaa_device = iaa_wq->iaa_device; 1281 idxd = iaa_device->idxd; 1282 pdev = idxd->pdev; 1283 dev = &pdev->dev; 1284 1285 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); 1286 1287 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); 1288 if (IS_ERR(idxd_desc)) { 1289 dev_dbg(dev, "idxd descriptor allocation failed\n"); 1290 dev_dbg(dev, "iaa compress failed: ret=%ld\n", 1291 PTR_ERR(idxd_desc)); 1292 return PTR_ERR(idxd_desc); 1293 } 1294 desc = idxd_desc->iax_hw; 1295 1296 /* Verify (optional) - decompress and check crc, suppress dest write */ 1297 1298 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC; 1299 desc->opcode = IAX_OPCODE_DECOMPRESS; 1300 desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT; 1301 desc->priv = 0; 1302 1303 desc->src1_addr = (u64)dst_addr; 1304 desc->src1_size = *dlen; 1305 desc->dst_addr = (u64)src_addr; 1306 desc->max_dst_size = slen; 1307 desc->completion_addr = idxd_desc->compl_dma; 1308 1309 dev_dbg(dev, "(verify) compression mode %s," 1310 " desc->src1_addr %llx, desc->src1_size %d," 1311 " desc->dst_addr %llx, desc->max_dst_size %d," 1312 " desc->src2_addr %llx, desc->src2_size %d\n", 1313 active_compression_mode->name, 1314 desc->src1_addr, desc->src1_size, desc->dst_addr, 1315 desc->max_dst_size, desc->src2_addr, desc->src2_size); 1316 1317 ret = idxd_submit_desc(wq, idxd_desc); 1318 if (ret) { 1319 dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret); 1320 goto err; 1321 } 1322 1323 ret = check_completion(dev, idxd_desc->iax_completion, false, false); 1324 if (ret) { 1325 dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret); 1326 goto err; 1327 } 1328 1329 if (*compression_crc != idxd_desc->iax_completion->crc) { 1330 ret = -EINVAL; 1331 dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:" 1332 " comp=0x%x, decomp=0x%x\n", *compression_crc, 1333 idxd_desc->iax_completion->crc); 1334 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 1335 8, 1, idxd_desc->iax_completion, 64, 0); 1336 goto err; 1337 } 1338 1339 idxd_free_desc(wq, idxd_desc); 1340 out: 1341 return ret; 1342 err: 1343 idxd_free_desc(wq, idxd_desc); 1344 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret); 1345 1346 goto out; 1347 } 1348 1349 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, 1350 struct idxd_wq *wq, 1351 dma_addr_t src_addr, unsigned int slen, 1352 dma_addr_t dst_addr, unsigned int *dlen) 1353 { 1354 struct iaa_device_compression_mode *active_compression_mode; 1355 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1356 struct iaa_device *iaa_device; 1357 struct idxd_desc *idxd_desc; 1358 struct iax_hw_desc *desc; 1359 struct idxd_device *idxd; 1360 struct iaa_wq *iaa_wq; 1361 struct pci_dev *pdev; 1362 struct device *dev; 1363 int ret = 0; 1364 1365 iaa_wq = idxd_wq_get_private(wq); 1366 iaa_device = iaa_wq->iaa_device; 1367 idxd = iaa_device->idxd; 1368 pdev = idxd->pdev; 1369 dev = &pdev->dev; 1370 1371 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); 1372 1373 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); 1374 if (IS_ERR(idxd_desc)) { 1375 dev_dbg(dev, "idxd descriptor allocation failed\n"); 1376 dev_dbg(dev, "iaa decompress failed: ret=%ld\n", 1377 PTR_ERR(idxd_desc)); 1378 return PTR_ERR(idxd_desc); 1379 } 1380 desc = idxd_desc->iax_hw; 1381 1382 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC; 1383 desc->opcode = IAX_OPCODE_DECOMPRESS; 1384 desc->max_dst_size = PAGE_SIZE; 1385 desc->decompr_flags = IAA_DECOMP_FLAGS; 1386 desc->priv = 0; 1387 1388 desc->src1_addr = (u64)src_addr; 1389 desc->dst_addr = (u64)dst_addr; 1390 desc->max_dst_size = *dlen; 1391 desc->src1_size = slen; 1392 desc->completion_addr = idxd_desc->compl_dma; 1393 1394 if (ctx->use_irq) { 1395 desc->flags |= IDXD_OP_FLAG_RCI; 1396 1397 idxd_desc->crypto.req = req; 1398 idxd_desc->crypto.tfm = tfm; 1399 idxd_desc->crypto.src_addr = src_addr; 1400 idxd_desc->crypto.dst_addr = dst_addr; 1401 idxd_desc->crypto.compress = false; 1402 1403 dev_dbg(dev, "%s: use_async_irq compression mode %s," 1404 " src_addr %llx, dst_addr %llx\n", __func__, 1405 active_compression_mode->name, 1406 src_addr, dst_addr); 1407 } 1408 1409 dev_dbg(dev, "%s: decompression mode %s," 1410 " desc->src1_addr %llx, desc->src1_size %d," 1411 " desc->dst_addr %llx, desc->max_dst_size %d," 1412 " desc->src2_addr %llx, desc->src2_size %d\n", __func__, 1413 active_compression_mode->name, 1414 desc->src1_addr, desc->src1_size, desc->dst_addr, 1415 desc->max_dst_size, desc->src2_addr, desc->src2_size); 1416 1417 ret = idxd_submit_desc(wq, idxd_desc); 1418 if (ret) { 1419 dev_dbg(dev, "submit_desc failed ret=%d\n", ret); 1420 goto err; 1421 } 1422 1423 /* Update stats */ 1424 update_total_decomp_calls(); 1425 update_wq_decomp_calls(wq); 1426 1427 if (ctx->async_mode) { 1428 ret = -EINPROGRESS; 1429 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); 1430 goto out; 1431 } 1432 1433 ret = check_completion(dev, idxd_desc->iax_completion, false, false); 1434 if (ret) { 1435 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret); 1436 if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) { 1437 pr_warn("%s: falling back to deflate-generic decompress, " 1438 "analytics error code %x\n", __func__, 1439 idxd_desc->iax_completion->error_code); 1440 ret = deflate_generic_decompress(req); 1441 if (ret) { 1442 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n", 1443 __func__, ret); 1444 goto err; 1445 } 1446 } else { 1447 goto err; 1448 } 1449 } else { 1450 req->dlen = idxd_desc->iax_completion->output_size; 1451 } 1452 1453 *dlen = req->dlen; 1454 1455 if (!ctx->async_mode) 1456 idxd_free_desc(wq, idxd_desc); 1457 1458 /* Update stats */ 1459 update_total_decomp_bytes_in(slen); 1460 update_wq_decomp_bytes(wq, slen); 1461 out: 1462 return ret; 1463 err: 1464 idxd_free_desc(wq, idxd_desc); 1465 dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret); 1466 1467 goto out; 1468 } 1469 1470 static int iaa_comp_acompress(struct acomp_req *req) 1471 { 1472 struct iaa_compression_ctx *compression_ctx; 1473 struct crypto_tfm *tfm = req->base.tfm; 1474 dma_addr_t src_addr, dst_addr; 1475 int nr_sgs, cpu, ret = 0; 1476 struct iaa_wq *iaa_wq; 1477 struct idxd_wq *wq; 1478 struct device *dev; 1479 1480 compression_ctx = crypto_tfm_ctx(tfm); 1481 1482 if (!iaa_crypto_enabled) { 1483 pr_debug("iaa_crypto disabled, not compressing\n"); 1484 return -ENODEV; 1485 } 1486 1487 if (!req->src || !req->slen) { 1488 pr_debug("invalid src, not compressing\n"); 1489 return -EINVAL; 1490 } 1491 1492 cpu = get_cpu(); 1493 wq = wq_table_next_wq(cpu); 1494 put_cpu(); 1495 if (!wq) { 1496 pr_debug("no wq configured for cpu=%d\n", cpu); 1497 return -ENODEV; 1498 } 1499 1500 ret = iaa_wq_get(wq); 1501 if (ret) { 1502 pr_debug("no wq available for cpu=%d\n", cpu); 1503 return -ENODEV; 1504 } 1505 1506 iaa_wq = idxd_wq_get_private(wq); 1507 1508 dev = &wq->idxd->pdev->dev; 1509 1510 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1511 if (nr_sgs <= 0 || nr_sgs > 1) { 1512 dev_dbg(dev, "couldn't map src sg for iaa device %d," 1513 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1514 iaa_wq->wq->id, ret); 1515 ret = -EIO; 1516 goto out; 1517 } 1518 src_addr = sg_dma_address(req->src); 1519 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," 1520 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, 1521 req->src, req->slen, sg_dma_len(req->src)); 1522 1523 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1524 if (nr_sgs <= 0 || nr_sgs > 1) { 1525 dev_dbg(dev, "couldn't map dst sg for iaa device %d," 1526 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1527 iaa_wq->wq->id, ret); 1528 ret = -EIO; 1529 goto err_map_dst; 1530 } 1531 dst_addr = sg_dma_address(req->dst); 1532 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," 1533 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, 1534 req->dst, req->dlen, sg_dma_len(req->dst)); 1535 1536 ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, 1537 &req->dlen); 1538 if (ret == -EINPROGRESS) 1539 return ret; 1540 1541 if (!ret && compression_ctx->verify_compress) { 1542 ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr); 1543 if (ret) { 1544 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret); 1545 goto out; 1546 } 1547 1548 ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen, 1549 dst_addr, &req->dlen); 1550 if (ret) 1551 dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret); 1552 1553 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); 1554 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); 1555 1556 goto out; 1557 } 1558 1559 if (ret) 1560 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); 1561 1562 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1563 err_map_dst: 1564 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1565 out: 1566 iaa_wq_put(wq); 1567 1568 return ret; 1569 } 1570 1571 static int iaa_comp_adecompress(struct acomp_req *req) 1572 { 1573 struct crypto_tfm *tfm = req->base.tfm; 1574 dma_addr_t src_addr, dst_addr; 1575 int nr_sgs, cpu, ret = 0; 1576 struct iaa_wq *iaa_wq; 1577 struct device *dev; 1578 struct idxd_wq *wq; 1579 1580 if (!iaa_crypto_enabled) { 1581 pr_debug("iaa_crypto disabled, not decompressing\n"); 1582 return -ENODEV; 1583 } 1584 1585 if (!req->src || !req->slen) { 1586 pr_debug("invalid src, not decompressing\n"); 1587 return -EINVAL; 1588 } 1589 1590 cpu = get_cpu(); 1591 wq = wq_table_next_wq(cpu); 1592 put_cpu(); 1593 if (!wq) { 1594 pr_debug("no wq configured for cpu=%d\n", cpu); 1595 return -ENODEV; 1596 } 1597 1598 ret = iaa_wq_get(wq); 1599 if (ret) { 1600 pr_debug("no wq available for cpu=%d\n", cpu); 1601 return -ENODEV; 1602 } 1603 1604 iaa_wq = idxd_wq_get_private(wq); 1605 1606 dev = &wq->idxd->pdev->dev; 1607 1608 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1609 if (nr_sgs <= 0 || nr_sgs > 1) { 1610 dev_dbg(dev, "couldn't map src sg for iaa device %d," 1611 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1612 iaa_wq->wq->id, ret); 1613 ret = -EIO; 1614 goto out; 1615 } 1616 src_addr = sg_dma_address(req->src); 1617 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," 1618 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, 1619 req->src, req->slen, sg_dma_len(req->src)); 1620 1621 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1622 if (nr_sgs <= 0 || nr_sgs > 1) { 1623 dev_dbg(dev, "couldn't map dst sg for iaa device %d," 1624 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, 1625 iaa_wq->wq->id, ret); 1626 ret = -EIO; 1627 goto err_map_dst; 1628 } 1629 dst_addr = sg_dma_address(req->dst); 1630 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," 1631 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, 1632 req->dst, req->dlen, sg_dma_len(req->dst)); 1633 1634 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, 1635 dst_addr, &req->dlen); 1636 if (ret == -EINPROGRESS) 1637 return ret; 1638 1639 if (ret != 0) 1640 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret); 1641 1642 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); 1643 err_map_dst: 1644 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); 1645 out: 1646 iaa_wq_put(wq); 1647 1648 return ret; 1649 } 1650 1651 static void compression_ctx_init(struct iaa_compression_ctx *ctx) 1652 { 1653 ctx->verify_compress = iaa_verify_compress; 1654 ctx->async_mode = async_mode; 1655 ctx->use_irq = use_irq; 1656 } 1657 1658 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm) 1659 { 1660 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 1661 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); 1662 1663 compression_ctx_init(ctx); 1664 1665 ctx->mode = IAA_MODE_FIXED; 1666 1667 return 0; 1668 } 1669 1670 static struct acomp_alg iaa_acomp_fixed_deflate = { 1671 .init = iaa_comp_init_fixed, 1672 .compress = iaa_comp_acompress, 1673 .decompress = iaa_comp_adecompress, 1674 .base = { 1675 .cra_name = "deflate", 1676 .cra_driver_name = "deflate-iaa", 1677 .cra_flags = CRYPTO_ALG_ASYNC, 1678 .cra_ctxsize = sizeof(struct iaa_compression_ctx), 1679 .cra_reqsize = sizeof(u32), 1680 .cra_module = THIS_MODULE, 1681 .cra_priority = IAA_ALG_PRIORITY, 1682 } 1683 }; 1684 1685 static int iaa_register_compression_device(void) 1686 { 1687 int ret; 1688 1689 ret = crypto_register_acomp(&iaa_acomp_fixed_deflate); 1690 if (ret) { 1691 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret); 1692 goto out; 1693 } 1694 1695 iaa_crypto_registered = true; 1696 out: 1697 return ret; 1698 } 1699 1700 static void iaa_unregister_compression_device(void) 1701 { 1702 if (iaa_crypto_registered) 1703 crypto_unregister_acomp(&iaa_acomp_fixed_deflate); 1704 } 1705 1706 static int iaa_crypto_probe(struct idxd_dev *idxd_dev) 1707 { 1708 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); 1709 struct idxd_device *idxd = wq->idxd; 1710 struct idxd_driver_data *data = idxd->data; 1711 struct device *dev = &idxd_dev->conf_dev; 1712 bool first_wq = false; 1713 int ret = 0; 1714 1715 if (idxd->state != IDXD_DEV_ENABLED) 1716 return -ENXIO; 1717 1718 if (data->type != IDXD_TYPE_IAX) 1719 return -ENODEV; 1720 1721 mutex_lock(&wq->wq_lock); 1722 1723 if (idxd_wq_get_private(wq)) { 1724 mutex_unlock(&wq->wq_lock); 1725 return -EBUSY; 1726 } 1727 1728 if (!idxd_wq_driver_name_match(wq, dev)) { 1729 dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n", 1730 idxd->id, wq->id, wq->driver_name, dev->driver->name); 1731 idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME; 1732 ret = -ENODEV; 1733 goto err; 1734 } 1735 1736 wq->type = IDXD_WQT_KERNEL; 1737 1738 ret = idxd_drv_enable_wq(wq); 1739 if (ret < 0) { 1740 dev_dbg(dev, "enable wq %d.%d failed: %d\n", 1741 idxd->id, wq->id, ret); 1742 ret = -ENXIO; 1743 goto err; 1744 } 1745 1746 mutex_lock(&iaa_devices_lock); 1747 1748 if (list_empty(&iaa_devices)) { 1749 ret = alloc_wq_table(wq->idxd->max_wqs); 1750 if (ret) 1751 goto err_alloc; 1752 first_wq = true; 1753 } 1754 1755 ret = save_iaa_wq(wq); 1756 if (ret) 1757 goto err_save; 1758 1759 rebalance_wq_table(); 1760 1761 if (first_wq) { 1762 iaa_crypto_enabled = true; 1763 ret = iaa_register_compression_device(); 1764 if (ret != 0) { 1765 iaa_crypto_enabled = false; 1766 dev_dbg(dev, "IAA compression device registration failed\n"); 1767 goto err_register; 1768 } 1769 try_module_get(THIS_MODULE); 1770 1771 pr_info("iaa_crypto now ENABLED\n"); 1772 } 1773 1774 mutex_unlock(&iaa_devices_lock); 1775 out: 1776 mutex_unlock(&wq->wq_lock); 1777 1778 return ret; 1779 1780 err_register: 1781 remove_iaa_wq(wq); 1782 free_iaa_wq(idxd_wq_get_private(wq)); 1783 err_save: 1784 if (first_wq) 1785 free_wq_table(); 1786 err_alloc: 1787 mutex_unlock(&iaa_devices_lock); 1788 idxd_drv_disable_wq(wq); 1789 err: 1790 wq->type = IDXD_WQT_NONE; 1791 1792 goto out; 1793 } 1794 1795 static void iaa_crypto_remove(struct idxd_dev *idxd_dev) 1796 { 1797 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); 1798 struct idxd_device *idxd = wq->idxd; 1799 struct iaa_wq *iaa_wq; 1800 bool free = false; 1801 1802 idxd_wq_quiesce(wq); 1803 1804 mutex_lock(&wq->wq_lock); 1805 mutex_lock(&iaa_devices_lock); 1806 1807 remove_iaa_wq(wq); 1808 1809 spin_lock(&idxd->dev_lock); 1810 iaa_wq = idxd_wq_get_private(wq); 1811 if (!iaa_wq) { 1812 spin_unlock(&idxd->dev_lock); 1813 pr_err("%s: no iaa_wq available to remove\n", __func__); 1814 goto out; 1815 } 1816 1817 if (iaa_wq->ref) { 1818 iaa_wq->remove = true; 1819 } else { 1820 wq = iaa_wq->wq; 1821 idxd_wq_set_private(wq, NULL); 1822 free = true; 1823 } 1824 spin_unlock(&idxd->dev_lock); 1825 if (free) { 1826 __free_iaa_wq(iaa_wq); 1827 kfree(iaa_wq); 1828 } 1829 1830 idxd_drv_disable_wq(wq); 1831 rebalance_wq_table(); 1832 1833 if (nr_iaa == 0) { 1834 iaa_crypto_enabled = false; 1835 free_wq_table(); 1836 module_put(THIS_MODULE); 1837 1838 pr_info("iaa_crypto now DISABLED\n"); 1839 } 1840 out: 1841 mutex_unlock(&iaa_devices_lock); 1842 mutex_unlock(&wq->wq_lock); 1843 } 1844 1845 static enum idxd_dev_type dev_types[] = { 1846 IDXD_DEV_WQ, 1847 IDXD_DEV_NONE, 1848 }; 1849 1850 static struct idxd_device_driver iaa_crypto_driver = { 1851 .probe = iaa_crypto_probe, 1852 .remove = iaa_crypto_remove, 1853 .name = IDXD_SUBDRIVER_NAME, 1854 .type = dev_types, 1855 .desc_complete = iaa_desc_complete, 1856 }; 1857 1858 static int __init iaa_crypto_init_module(void) 1859 { 1860 int ret = 0; 1861 int node; 1862 1863 nr_cpus = num_possible_cpus(); 1864 for_each_node_with_cpus(node) 1865 nr_nodes++; 1866 if (!nr_nodes) { 1867 pr_err("IAA couldn't find any nodes with cpus\n"); 1868 return -ENODEV; 1869 } 1870 nr_cpus_per_node = nr_cpus / nr_nodes; 1871 1872 ret = iaa_aecs_init_fixed(); 1873 if (ret < 0) { 1874 pr_debug("IAA fixed compression mode init failed\n"); 1875 goto err_aecs_init; 1876 } 1877 1878 ret = idxd_driver_register(&iaa_crypto_driver); 1879 if (ret) { 1880 pr_debug("IAA wq sub-driver registration failed\n"); 1881 goto err_driver_reg; 1882 } 1883 1884 ret = driver_create_file(&iaa_crypto_driver.drv, 1885 &driver_attr_verify_compress); 1886 if (ret) { 1887 pr_debug("IAA verify_compress attr creation failed\n"); 1888 goto err_verify_attr_create; 1889 } 1890 1891 ret = driver_create_file(&iaa_crypto_driver.drv, 1892 &driver_attr_sync_mode); 1893 if (ret) { 1894 pr_debug("IAA sync mode attr creation failed\n"); 1895 goto err_sync_attr_create; 1896 } 1897 1898 if (iaa_crypto_debugfs_init()) 1899 pr_warn("debugfs init failed, stats not available\n"); 1900 1901 pr_debug("initialized\n"); 1902 out: 1903 return ret; 1904 1905 err_sync_attr_create: 1906 driver_remove_file(&iaa_crypto_driver.drv, 1907 &driver_attr_verify_compress); 1908 err_verify_attr_create: 1909 idxd_driver_unregister(&iaa_crypto_driver); 1910 err_driver_reg: 1911 iaa_aecs_cleanup_fixed(); 1912 err_aecs_init: 1913 1914 goto out; 1915 } 1916 1917 static void __exit iaa_crypto_cleanup_module(void) 1918 { 1919 iaa_unregister_compression_device(); 1920 1921 iaa_crypto_debugfs_cleanup(); 1922 driver_remove_file(&iaa_crypto_driver.drv, 1923 &driver_attr_sync_mode); 1924 driver_remove_file(&iaa_crypto_driver.drv, 1925 &driver_attr_verify_compress); 1926 idxd_driver_unregister(&iaa_crypto_driver); 1927 iaa_aecs_cleanup_fixed(); 1928 1929 pr_debug("cleaned up\n"); 1930 } 1931 1932 MODULE_IMPORT_NS("IDXD"); 1933 MODULE_LICENSE("GPL"); 1934 MODULE_ALIAS_IDXD_DEVICE(0); 1935 MODULE_AUTHOR("Intel Corporation"); 1936 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver"); 1937 1938 module_init(iaa_crypto_init_module); 1939 module_exit(iaa_crypto_cleanup_module); 1940