1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ 3 #include <linux/init.h> 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/pci.h> 7 #include <linux/io-64-nonatomic-lo-hi.h> 8 #include <linux/dmaengine.h> 9 #include <linux/delay.h> 10 #include <linux/iommu.h> 11 #include <linux/sched/mm.h> 12 #include <uapi/linux/idxd.h> 13 #include "../dmaengine.h" 14 #include "idxd.h" 15 #include "registers.h" 16 17 enum irq_work_type { 18 IRQ_WORK_NORMAL = 0, 19 IRQ_WORK_PROCESS_FAULT, 20 }; 21 22 struct idxd_resubmit { 23 struct work_struct work; 24 struct idxd_desc *desc; 25 }; 26 27 struct idxd_int_handle_revoke { 28 struct work_struct work; 29 struct idxd_device *idxd; 30 }; 31 32 static void idxd_device_reinit(struct work_struct *work) 33 { 34 struct idxd_device *idxd = container_of(work, struct idxd_device, work); 35 struct device *dev = &idxd->pdev->dev; 36 int rc, i; 37 38 idxd_device_reset(idxd); 39 rc = idxd_device_config(idxd); 40 if (rc < 0) 41 goto out; 42 43 rc = idxd_device_enable(idxd); 44 if (rc < 0) 45 goto out; 46 47 for (i = 0; i < idxd->max_wqs; i++) { 48 if (test_bit(i, idxd->wq_enable_map)) { 49 struct idxd_wq *wq = idxd->wqs[i]; 50 51 rc = idxd_wq_enable(wq); 52 if (rc < 0) { 53 clear_bit(i, idxd->wq_enable_map); 54 dev_warn(dev, "Unable to re-enable wq %s\n", 55 dev_name(wq_confdev(wq))); 56 } 57 } 58 } 59 60 return; 61 62 out: 63 idxd_device_clear_state(idxd); 64 } 65 66 /* 67 * The function sends a drain descriptor for the interrupt handle. The drain ensures 68 * all descriptors with this interrupt handle is flushed and the interrupt 69 * will allow the cleanup of the outstanding descriptors. 70 */ 71 static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) 72 { 73 struct idxd_wq *wq = ie_to_wq(ie); 74 struct idxd_device *idxd = wq->idxd; 75 struct device *dev = &idxd->pdev->dev; 76 struct dsa_hw_desc desc = {}; 77 void __iomem *portal; 78 int rc; 79 80 /* Issue a simple drain operation with interrupt but no completion record */ 81 desc.flags = IDXD_OP_FLAG_RCI; 82 desc.opcode = DSA_OPCODE_DRAIN; 83 desc.priv = 1; 84 85 if (ie->pasid != IOMMU_PASID_INVALID) 86 desc.pasid = ie->pasid; 87 desc.int_handle = ie->int_handle; 88 portal = idxd_wq_portal_addr(wq); 89 90 /* 91 * The wmb() makes sure that the descriptor is all there before we 92 * issue. 93 */ 94 wmb(); 95 if (wq_dedicated(wq)) { 96 iosubmit_cmds512(portal, &desc, 1); 97 } else { 98 rc = idxd_enqcmds(wq, portal, &desc); 99 /* This should not fail unless hardware failed. */ 100 if (rc < 0) 101 dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); 102 } 103 } 104 105 static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) 106 { 107 LIST_HEAD(flist); 108 struct idxd_desc *d, *t; 109 struct llist_node *head; 110 111 spin_lock(&ie->list_lock); 112 head = llist_del_all(&ie->pending_llist); 113 if (head) { 114 llist_for_each_entry_safe(d, t, head, llnode) 115 list_add_tail(&d->list, &ie->work_list); 116 } 117 118 list_for_each_entry_safe(d, t, &ie->work_list, list) { 119 if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) 120 list_move_tail(&d->list, &flist); 121 } 122 spin_unlock(&ie->list_lock); 123 124 list_for_each_entry_safe(d, t, &flist, list) { 125 list_del(&d->list); 126 idxd_desc_complete(d, IDXD_COMPLETE_ABORT, true); 127 } 128 } 129 130 static void idxd_int_handle_revoke(struct work_struct *work) 131 { 132 struct idxd_int_handle_revoke *revoke = 133 container_of(work, struct idxd_int_handle_revoke, work); 134 struct idxd_device *idxd = revoke->idxd; 135 struct pci_dev *pdev = idxd->pdev; 136 struct device *dev = &pdev->dev; 137 int i, new_handle, rc; 138 139 if (!idxd->request_int_handles) { 140 kfree(revoke); 141 dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); 142 return; 143 } 144 145 /* 146 * The loop attempts to acquire new interrupt handle for all interrupt 147 * vectors that supports a handle. If a new interrupt handle is acquired and the 148 * wq is kernel type, the driver will kill the percpu_ref to pause all 149 * ongoing descriptor submissions. The interrupt handle is then changed. 150 * After change, the percpu_ref is revived and all the pending submissions 151 * are woken to try again. A drain is sent to for the interrupt handle 152 * at the end to make sure all invalid int handle descriptors are processed. 153 */ 154 for (i = 1; i < idxd->irq_cnt; i++) { 155 struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); 156 struct idxd_wq *wq = ie_to_wq(ie); 157 158 if (ie->int_handle == INVALID_INT_HANDLE) 159 continue; 160 161 rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); 162 if (rc < 0) { 163 dev_warn(dev, "get int handle %d failed: %d\n", i, rc); 164 /* 165 * Failed to acquire new interrupt handle. Kill the WQ 166 * and release all the pending submitters. The submitters will 167 * get error return code and handle appropriately. 168 */ 169 ie->int_handle = INVALID_INT_HANDLE; 170 idxd_wq_quiesce(wq); 171 idxd_abort_invalid_int_handle_descs(ie); 172 continue; 173 } 174 175 /* No change in interrupt handle, nothing needs to be done */ 176 if (ie->int_handle == new_handle) 177 continue; 178 179 if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { 180 /* 181 * All the MSIX interrupts are allocated at once during probe. 182 * Therefore we need to update all interrupts even if the WQ 183 * isn't supporting interrupt operations. 184 */ 185 ie->int_handle = new_handle; 186 continue; 187 } 188 189 mutex_lock(&wq->wq_lock); 190 reinit_completion(&wq->wq_resurrect); 191 192 /* Kill percpu_ref to pause additional descriptor submissions */ 193 percpu_ref_kill(&wq->wq_active); 194 195 /* Wait for all submitters quiesce before we change interrupt handle */ 196 wait_for_completion(&wq->wq_dead); 197 198 ie->int_handle = new_handle; 199 200 /* Revive percpu ref and wake up all the waiting submitters */ 201 percpu_ref_reinit(&wq->wq_active); 202 complete_all(&wq->wq_resurrect); 203 mutex_unlock(&wq->wq_lock); 204 205 /* 206 * The delay here is to wait for all possible MOVDIR64B that 207 * are issued before percpu_ref_kill() has happened to have 208 * reached the PCIe domain before the drain is issued. The driver 209 * needs to ensure that the drain descriptor issued does not pass 210 * all the other issued descriptors that contain the invalid 211 * interrupt handle in order to ensure that the drain descriptor 212 * interrupt will allow the cleanup of all the descriptors with 213 * invalid interrupt handle. 214 */ 215 if (wq_dedicated(wq)) 216 udelay(100); 217 idxd_int_handle_revoke_drain(ie); 218 } 219 kfree(revoke); 220 } 221 222 static void idxd_evl_fault_work(struct work_struct *work) 223 { 224 struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); 225 struct idxd_wq *wq = fault->wq; 226 struct idxd_device *idxd = wq->idxd; 227 struct device *dev = &idxd->pdev->dev; 228 struct idxd_evl *evl = idxd->evl; 229 struct __evl_entry *entry_head = fault->entry; 230 void *cr = (void *)entry_head + idxd->data->evl_cr_off; 231 int cr_size = idxd->data->compl_size; 232 u8 *status = (u8 *)cr + idxd->data->cr_status_off; 233 u8 *result = (u8 *)cr + idxd->data->cr_result_off; 234 int copied, copy_size; 235 bool *bf; 236 237 switch (fault->status) { 238 case DSA_COMP_CRA_XLAT: 239 if (entry_head->batch && entry_head->first_err_in_batch) 240 evl->batch_fail[entry_head->batch_id] = false; 241 242 copy_size = cr_size; 243 idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); 244 break; 245 case DSA_COMP_BATCH_EVL_ERR: 246 bf = &evl->batch_fail[entry_head->batch_id]; 247 248 copy_size = entry_head->rcr || *bf ? cr_size : 0; 249 if (*bf) { 250 if (*status == DSA_COMP_SUCCESS) 251 *status = DSA_COMP_BATCH_FAIL; 252 *result = 1; 253 *bf = false; 254 } 255 idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); 256 break; 257 case DSA_COMP_DRAIN_EVL: 258 copy_size = cr_size; 259 break; 260 default: 261 copy_size = 0; 262 dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status); 263 break; 264 } 265 266 if (copy_size == 0) 267 return; 268 269 /* 270 * Copy completion record to fault_addr in user address space 271 * that is found by wq and PASID. 272 */ 273 copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr, 274 cr, copy_size); 275 /* 276 * The task that triggered the page fault is unknown currently 277 * because multiple threads may share the user address 278 * space or the task exits already before this fault. 279 * So if the copy fails, SIGSEGV can not be sent to the task. 280 * Just print an error for the failure. The user application 281 * waiting for the completion record will time out on this 282 * failure. 283 */ 284 switch (fault->status) { 285 case DSA_COMP_CRA_XLAT: 286 if (copied != copy_size) { 287 idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); 288 dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", 289 copy_size, copied); 290 if (entry_head->batch) 291 evl->batch_fail[entry_head->batch_id] = true; 292 } 293 break; 294 case DSA_COMP_BATCH_EVL_ERR: 295 if (copied != copy_size) { 296 idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); 297 dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", 298 copy_size, copied); 299 } 300 break; 301 case DSA_COMP_DRAIN_EVL: 302 if (copied != copy_size) 303 dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n", 304 copy_size, copied); 305 break; 306 } 307 308 kmem_cache_free(idxd->evl_cache, fault); 309 } 310 311 static void process_evl_entry(struct idxd_device *idxd, 312 struct __evl_entry *entry_head, unsigned int index) 313 { 314 struct device *dev = &idxd->pdev->dev; 315 struct idxd_evl *evl = idxd->evl; 316 u8 status; 317 318 if (test_bit(index, evl->bmap)) { 319 clear_bit(index, evl->bmap); 320 } else { 321 status = DSA_COMP_STATUS(entry_head->error); 322 323 if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || 324 status == DSA_COMP_BATCH_EVL_ERR) { 325 struct idxd_evl_fault *fault; 326 int ent_size = evl_ent_size(idxd); 327 328 if (entry_head->rci) 329 dev_dbg(dev, "Completion Int Req set, ignoring!\n"); 330 331 if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) 332 return; 333 334 fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC); 335 if (fault) { 336 struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; 337 338 fault->wq = wq; 339 fault->status = status; 340 memcpy(&fault->entry, entry_head, ent_size); 341 INIT_WORK(&fault->work, idxd_evl_fault_work); 342 queue_work(wq->wq, &fault->work); 343 } else { 344 dev_warn(dev, "Failed to service fault work.\n"); 345 } 346 } else { 347 dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", 348 status, entry_head->operation, 349 entry_head->fault_addr); 350 } 351 } 352 } 353 354 static void process_evl_entries(struct idxd_device *idxd) 355 { 356 union evl_status_reg evl_status; 357 unsigned int h, t; 358 struct idxd_evl *evl = idxd->evl; 359 struct __evl_entry *entry_head; 360 unsigned int ent_size = evl_ent_size(idxd); 361 u32 size; 362 363 evl_status.bits = 0; 364 evl_status.int_pending = 1; 365 366 mutex_lock(&evl->lock); 367 /* Clear interrupt pending bit */ 368 iowrite32(evl_status.bits_upper32, 369 idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); 370 evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); 371 t = evl_status.tail; 372 h = evl_status.head; 373 size = idxd->evl->size; 374 375 while (h != t) { 376 entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); 377 process_evl_entry(idxd, entry_head, h); 378 h = (h + 1) % size; 379 } 380 381 evl_status.head = h; 382 iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); 383 mutex_unlock(&evl->lock); 384 } 385 386 static void idxd_device_flr(struct work_struct *work) 387 { 388 struct idxd_device *idxd = container_of(work, struct idxd_device, work); 389 int rc; 390 391 /* 392 * IDXD device requires a Function Level Reset (FLR). 393 * pci_reset_function() will reset the device with FLR. 394 */ 395 rc = pci_reset_function(idxd->pdev); 396 if (rc) 397 dev_err(&idxd->pdev->dev, "FLR failed\n"); 398 } 399 400 static void idxd_wqs_flush_descs(struct idxd_device *idxd) 401 { 402 int i; 403 404 for (i = 0; i < idxd->max_wqs; i++) { 405 struct idxd_wq *wq = idxd->wqs[i]; 406 407 idxd_wq_flush_descs(wq); 408 } 409 } 410 411 static irqreturn_t idxd_halt(struct idxd_device *idxd) 412 { 413 union gensts_reg gensts; 414 415 gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); 416 if (gensts.state == IDXD_DEVICE_STATE_HALT) { 417 idxd->state = IDXD_DEV_HALTED; 418 if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { 419 /* 420 * If we need a software reset, we will throw the work 421 * on a system workqueue in order to allow interrupts 422 * for the device command completions. 423 */ 424 INIT_WORK(&idxd->work, idxd_device_reinit); 425 queue_work(idxd->wq, &idxd->work); 426 } else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) { 427 idxd->state = IDXD_DEV_HALTED; 428 idxd_mask_error_interrupts(idxd); 429 /* Flush all pending descriptors, and disable 430 * interrupts, they will be re-enabled when FLR 431 * concludes. 432 */ 433 idxd_wqs_flush_descs(idxd); 434 dev_dbg(&idxd->pdev->dev, 435 "idxd halted, doing FLR. After FLR, configs are restored\n"); 436 INIT_WORK(&idxd->work, idxd_device_flr); 437 queue_work(idxd->wq, &idxd->work); 438 439 } else { 440 idxd->state = IDXD_DEV_HALTED; 441 idxd_wqs_quiesce(idxd); 442 idxd_wqs_unmap_portal(idxd); 443 idxd_device_clear_state(idxd); 444 dev_err(&idxd->pdev->dev, 445 "idxd halted, need system reset"); 446 447 return -ENXIO; 448 } 449 } 450 451 return IRQ_HANDLED; 452 } 453 454 irqreturn_t idxd_misc_thread(int vec, void *data) 455 { 456 struct idxd_irq_entry *irq_entry = data; 457 struct idxd_device *idxd = ie_to_idxd(irq_entry); 458 struct device *dev = &idxd->pdev->dev; 459 u32 val = 0; 460 int i; 461 u32 cause; 462 463 cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); 464 if (!cause) 465 return IRQ_NONE; 466 467 iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); 468 469 if (cause & IDXD_INTC_HALT_STATE) 470 return idxd_halt(idxd); 471 472 if (cause & IDXD_INTC_ERR) { 473 spin_lock(&idxd->dev_lock); 474 for (i = 0; i < 4; i++) 475 idxd->sw_err.bits[i] = ioread64(idxd->reg_base + 476 IDXD_SWERR_OFFSET + i * sizeof(u64)); 477 478 iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, 479 idxd->reg_base + IDXD_SWERR_OFFSET); 480 481 if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { 482 int id = idxd->sw_err.wq_idx; 483 struct idxd_wq *wq = idxd->wqs[id]; 484 485 if (wq->type == IDXD_WQT_USER) 486 wake_up_interruptible(&wq->err_queue); 487 } else { 488 int i; 489 490 for (i = 0; i < idxd->max_wqs; i++) { 491 struct idxd_wq *wq = idxd->wqs[i]; 492 493 if (wq->type == IDXD_WQT_USER) 494 wake_up_interruptible(&wq->err_queue); 495 } 496 } 497 498 spin_unlock(&idxd->dev_lock); 499 val |= IDXD_INTC_ERR; 500 501 for (i = 0; i < 4; i++) 502 dev_warn_ratelimited(dev, "err[%d]: %#16.16llx\n", 503 i, idxd->sw_err.bits[i]); 504 } 505 506 if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { 507 struct idxd_int_handle_revoke *revoke; 508 509 val |= IDXD_INTC_INT_HANDLE_REVOKED; 510 511 revoke = kzalloc_obj(*revoke, GFP_ATOMIC); 512 if (revoke) { 513 revoke->idxd = idxd; 514 INIT_WORK(&revoke->work, idxd_int_handle_revoke); 515 queue_work(idxd->wq, &revoke->work); 516 517 } else { 518 dev_err(dev, "Failed to allocate work for int handle revoke\n"); 519 idxd_wqs_quiesce(idxd); 520 } 521 } 522 523 if (cause & IDXD_INTC_CMD) { 524 val |= IDXD_INTC_CMD; 525 complete(idxd->cmd_done); 526 } 527 528 if (cause & IDXD_INTC_OCCUPY) { 529 /* Driver does not utilize occupancy interrupt */ 530 val |= IDXD_INTC_OCCUPY; 531 } 532 533 if (cause & IDXD_INTC_PERFMON_OVFL) { 534 val |= IDXD_INTC_PERFMON_OVFL; 535 perfmon_counter_overflow(idxd); 536 } 537 538 if (cause & IDXD_INTC_EVL) { 539 val |= IDXD_INTC_EVL; 540 process_evl_entries(idxd); 541 } 542 543 val ^= cause; 544 if (val) 545 dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", 546 val); 547 548 return IRQ_HANDLED; 549 } 550 551 static void idxd_int_handle_resubmit_work(struct work_struct *work) 552 { 553 struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); 554 struct idxd_desc *desc = irw->desc; 555 struct idxd_wq *wq = desc->wq; 556 int rc; 557 558 desc->completion->status = 0; 559 rc = idxd_submit_desc(wq, desc); 560 if (rc < 0) { 561 dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", 562 desc->id, wq->id); 563 /* 564 * If the error is not -EAGAIN, it means the submission failed due to wq 565 * has been killed instead of ENQCMDS failure. Here the driver needs to 566 * notify the submitter of the failure by reporting abort status. 567 * 568 * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the 569 * abort. 570 */ 571 if (rc != -EAGAIN) { 572 desc->completion->status = IDXD_COMP_DESC_ABORT; 573 idxd_desc_complete(desc, IDXD_COMPLETE_ABORT, false); 574 } 575 idxd_free_desc(wq, desc); 576 } 577 kfree(irw); 578 } 579 580 bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) 581 { 582 struct idxd_wq *wq = desc->wq; 583 struct idxd_device *idxd = wq->idxd; 584 struct idxd_resubmit *irw; 585 586 irw = kzalloc_obj(*irw); 587 if (!irw) 588 return false; 589 590 irw->desc = desc; 591 INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); 592 queue_work(idxd->wq, &irw->work); 593 return true; 594 } 595 596 static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) 597 { 598 struct idxd_desc *desc, *t; 599 struct llist_node *head; 600 601 head = llist_del_all(&irq_entry->pending_llist); 602 if (!head) 603 return; 604 605 llist_for_each_entry_safe(desc, t, head, llnode) { 606 u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; 607 608 if (status) { 609 /* 610 * Check against the original status as ABORT is software defined 611 * and 0xff, which DSA_COMP_STATUS_MASK can mask out. 612 */ 613 if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { 614 idxd_desc_complete(desc, IDXD_COMPLETE_ABORT, true); 615 continue; 616 } 617 618 idxd_desc_complete(desc, IDXD_COMPLETE_NORMAL, true); 619 } else { 620 spin_lock(&irq_entry->list_lock); 621 list_add_tail(&desc->list, 622 &irq_entry->work_list); 623 spin_unlock(&irq_entry->list_lock); 624 } 625 } 626 } 627 628 static void irq_process_work_list(struct idxd_irq_entry *irq_entry) 629 { 630 LIST_HEAD(flist); 631 struct idxd_desc *desc, *n; 632 633 /* 634 * This lock protects list corruption from access of list outside of the irq handler 635 * thread. 636 */ 637 spin_lock(&irq_entry->list_lock); 638 if (list_empty(&irq_entry->work_list)) { 639 spin_unlock(&irq_entry->list_lock); 640 return; 641 } 642 643 list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { 644 if (desc->completion->status) { 645 list_move_tail(&desc->list, &flist); 646 } 647 } 648 649 spin_unlock(&irq_entry->list_lock); 650 651 list_for_each_entry_safe(desc, n, &flist, list) { 652 /* 653 * Check against the original status as ABORT is software defined 654 * and 0xff, which DSA_COMP_STATUS_MASK can mask out. 655 */ 656 list_del(&desc->list); 657 658 if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { 659 idxd_desc_complete(desc, IDXD_COMPLETE_ABORT, true); 660 continue; 661 } 662 663 idxd_desc_complete(desc, IDXD_COMPLETE_NORMAL, true); 664 } 665 } 666 667 irqreturn_t idxd_wq_thread(int irq, void *data) 668 { 669 struct idxd_irq_entry *irq_entry = data; 670 671 /* 672 * There are two lists we are processing. The pending_llist is where 673 * submmiter adds all the submitted descriptor after sending it to 674 * the workqueue. It's a lockless singly linked list. The work_list 675 * is the common linux double linked list. We are in a scenario of 676 * multiple producers and a single consumer. The producers are all 677 * the kernel submitters of descriptors, and the consumer is the 678 * kernel irq handler thread for the msix vector when using threaded 679 * irq. To work with the restrictions of llist to remain lockless, 680 * we are doing the following steps: 681 * 1. Iterate through the work_list and process any completed 682 * descriptor. Delete the completed entries during iteration. 683 * 2. llist_del_all() from the pending list. 684 * 3. Iterate through the llist that was deleted from the pending list 685 * and process the completed entries. 686 * 4. If the entry is still waiting on hardware, list_add_tail() to 687 * the work_list. 688 */ 689 irq_process_work_list(irq_entry); 690 irq_process_pending_llist(irq_entry); 691 692 return IRQ_HANDLED; 693 } 694