1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/firmware.h> 20 #include <asm/vphn.h> 21 #include <asm/vas.h> 22 #include "vas.h" 23 24 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 25 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 26 /* The hypervisor allows one credit per window right now */ 27 #define DEF_WIN_CREDS 1 28 29 static struct vas_all_caps caps_all; 30 static bool copypaste_feat; 31 static struct hv_vas_cop_feat_caps hv_cop_caps; 32 33 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 34 static DEFINE_MUTEX(vas_pseries_mutex); 35 static bool migration_in_progress; 36 37 static long hcall_return_busy_check(long rc) 38 { 39 /* Check if we are stalled for some time */ 40 if (H_IS_LONG_BUSY(rc)) { 41 msleep(get_longbusy_msecs(rc)); 42 rc = H_BUSY; 43 } else if (rc == H_BUSY) { 44 cond_resched(); 45 } 46 47 return rc; 48 } 49 50 /* 51 * Allocate VAS window hcall 52 */ 53 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 54 u8 wintype, u16 credits) 55 { 56 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 57 long rc; 58 59 do { 60 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 61 credits, domain[0], domain[1], domain[2], 62 domain[3], domain[4], domain[5]); 63 64 rc = hcall_return_busy_check(rc); 65 } while (rc == H_BUSY); 66 67 if (rc == H_SUCCESS) { 68 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 69 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 70 return -ENOTSUPP; 71 } 72 win->vas_win.winid = retbuf[0]; 73 win->win_addr = retbuf[1]; 74 win->complete_irq = retbuf[2]; 75 win->fault_irq = retbuf[3]; 76 return 0; 77 } 78 79 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 80 rc, wintype, credits); 81 82 return -EIO; 83 } 84 85 /* 86 * Deallocate VAS window hcall. 87 */ 88 static int h_deallocate_vas_window(u64 winid) 89 { 90 long rc; 91 92 do { 93 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 94 95 rc = hcall_return_busy_check(rc); 96 } while (rc == H_BUSY); 97 98 if (rc == H_SUCCESS) 99 return 0; 100 101 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 102 rc, winid); 103 return -EIO; 104 } 105 106 /* 107 * Modify VAS window. 108 * After the window is opened with allocate window hcall, configure it 109 * with flags and LPAR PID before using. 110 */ 111 static int h_modify_vas_window(struct pseries_vas_window *win) 112 { 113 long rc; 114 115 /* 116 * AMR value is not supported in Linux VAS implementation. 117 * The hypervisor ignores it if 0 is passed. 118 */ 119 do { 120 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 121 win->vas_win.winid, win->pid, 0, 122 VAS_MOD_WIN_FLAGS, 0); 123 124 rc = hcall_return_busy_check(rc); 125 } while (rc == H_BUSY); 126 127 if (rc == H_SUCCESS) 128 return 0; 129 130 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 131 rc, win->vas_win.winid, win->pid); 132 return -EIO; 133 } 134 135 /* 136 * This hcall is used to determine the capabilities from the hypervisor. 137 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 138 * @query_type: If 0 is passed, the hypervisor returns the overall 139 * capabilities which provides all feature(s) that are 140 * available. Then query the hypervisor to get the 141 * corresponding capabilities for the specific feature. 142 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 143 * and VAS GZIP Default capabilities. 144 * H_QUERY_NX_CAPABILITIES provides NX GZIP 145 * capabilities. 146 * @result: Return buffer to save capabilities. 147 */ 148 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 149 { 150 long rc; 151 152 rc = plpar_hcall_norets(hcall, query_type, result); 153 154 if (rc == H_SUCCESS) 155 return 0; 156 157 /* H_FUNCTION means HV does not support VAS so don't print an error */ 158 if (rc != H_FUNCTION) { 159 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 160 (hcall == H_QUERY_VAS_CAPABILITIES) ? 161 "H_QUERY_VAS_CAPABILITIES" : 162 "H_QUERY_NX_CAPABILITIES", 163 rc, query_type, result); 164 } 165 166 return -EIO; 167 } 168 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 169 170 /* 171 * hcall to get fault CRB from the hypervisor. 172 */ 173 static int h_get_nx_fault(u32 winid, u64 buffer) 174 { 175 long rc; 176 177 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 178 179 if (rc == H_SUCCESS) 180 return 0; 181 182 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 183 rc, winid, buffer); 184 return -EIO; 185 186 } 187 188 /* 189 * Handle the fault interrupt. 190 * When the fault interrupt is received for each window, query the 191 * hypervisor to get the fault CRB on the specific fault. Then 192 * process the CRB by updating CSB or send signal if the user space 193 * CSB is invalid. 194 * Note: The hypervisor forwards an interrupt for each fault request. 195 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 196 */ 197 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 198 { 199 struct pseries_vas_window *txwin = data; 200 struct coprocessor_request_block crb; 201 struct vas_user_win_ref *tsk_ref; 202 int rc; 203 204 while (atomic_read(&txwin->pending_faults)) { 205 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 206 if (!rc) { 207 tsk_ref = &txwin->vas_win.task_ref; 208 vas_dump_crb(&crb); 209 vas_update_csb(&crb, tsk_ref); 210 } 211 atomic_dec(&txwin->pending_faults); 212 } 213 214 return IRQ_HANDLED; 215 } 216 217 /* 218 * irq_default_primary_handler() can be used only with IRQF_ONESHOT 219 * which disables IRQ before executing the thread handler and enables 220 * it after. But this disabling interrupt sets the VAS IRQ OFF 221 * state in the hypervisor. If the NX generates fault interrupt 222 * during this window, the hypervisor will not deliver this 223 * interrupt to the LPAR. So use VAS specific IRQ handler instead 224 * of calling the default primary handler. 225 */ 226 static irqreturn_t pseries_vas_irq_handler(int irq, void *data) 227 { 228 struct pseries_vas_window *txwin = data; 229 230 /* 231 * The thread handler will process this interrupt if it is 232 * already running. 233 */ 234 atomic_inc(&txwin->pending_faults); 235 236 return IRQ_WAKE_THREAD; 237 } 238 239 /* 240 * Allocate window and setup IRQ mapping. 241 */ 242 static int allocate_setup_window(struct pseries_vas_window *txwin, 243 u64 *domain, u8 wintype) 244 { 245 int rc; 246 247 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 248 if (rc) 249 return rc; 250 /* 251 * On PowerVM, the hypervisor setup and forwards the fault 252 * interrupt per window. So the IRQ setup and fault handling 253 * will be done for each open window separately. 254 */ 255 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 256 if (!txwin->fault_virq) { 257 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 258 rc = -EINVAL; 259 goto out_win; 260 } 261 262 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 263 txwin->vas_win.winid); 264 if (!txwin->name) { 265 rc = -ENOMEM; 266 goto out_irq; 267 } 268 269 rc = request_threaded_irq(txwin->fault_virq, 270 pseries_vas_irq_handler, 271 pseries_vas_fault_thread_fn, 0, 272 txwin->name, txwin); 273 if (rc) { 274 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 275 txwin->vas_win.winid, txwin->fault_virq, rc); 276 goto out_free; 277 } 278 279 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 280 281 return 0; 282 out_free: 283 kfree(txwin->name); 284 out_irq: 285 irq_dispose_mapping(txwin->fault_virq); 286 out_win: 287 h_deallocate_vas_window(txwin->vas_win.winid); 288 return rc; 289 } 290 291 static inline void free_irq_setup(struct pseries_vas_window *txwin) 292 { 293 free_irq(txwin->fault_virq, txwin); 294 kfree(txwin->name); 295 irq_dispose_mapping(txwin->fault_virq); 296 } 297 298 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 299 enum vas_cop_type cop_type) 300 { 301 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 302 struct vas_cop_feat_caps *cop_feat_caps; 303 struct vas_caps *caps; 304 struct pseries_vas_window *txwin; 305 int rc; 306 307 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 308 if (!txwin) 309 return ERR_PTR(-ENOMEM); 310 311 /* 312 * A VAS window can have many credits which means that many 313 * requests can be issued simultaneously. But the hypervisor 314 * restricts one credit per window. 315 * The hypervisor introduces 2 different types of credits: 316 * Default credit type (Uses normal priority FIFO): 317 * A limited number of credits are assigned to partitions 318 * based on processor entitlement. But these credits may be 319 * over-committed on a system depends on whether the CPUs 320 * are in shared or dedicated modes - that is, more requests 321 * may be issued across the system than NX can service at 322 * once which can result in paste command failure (RMA_busy). 323 * Then the process has to resend requests or fall-back to 324 * SW compression. 325 * Quality of Service (QoS) credit type (Uses high priority FIFO): 326 * To avoid NX HW contention, the system admins can assign 327 * QoS credits for each LPAR so that this partition is 328 * guaranteed access to NX resources. These credits are 329 * assigned to partitions via the HMC. 330 * Refer PAPR for more information. 331 * 332 * Allocate window with QoS credits if user requested. Otherwise 333 * default credits are used. 334 */ 335 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 336 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 337 else 338 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 339 340 cop_feat_caps = &caps->caps; 341 342 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 343 atomic_read(&cop_feat_caps->nr_total_credits)) { 344 pr_err_ratelimited("Credits are not available to allocate window\n"); 345 rc = -EINVAL; 346 goto out; 347 } 348 349 if (vas_id == -1) { 350 /* 351 * The user space is requesting to allocate a window on 352 * a VAS instance where the process is executing. 353 * On PowerVM, domain values are passed to the hypervisor 354 * to select VAS instance. Useful if the process is 355 * affinity to NUMA node. 356 * The hypervisor selects VAS instance if 357 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 358 * The h_allocate_vas_window hcall is defined to take a 359 * domain values as specified by h_home_node_associativity, 360 * So no unpacking needs to be done. 361 */ 362 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 363 VPHN_FLAG_VCPU, hard_smp_processor_id()); 364 if (rc != H_SUCCESS) { 365 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 366 goto out; 367 } 368 } 369 370 txwin->pid = mfspr(SPRN_PID); 371 372 /* 373 * Allocate / Deallocate window hcalls and setup / free IRQs 374 * have to be protected with mutex. 375 * Open VAS window: Allocate window hcall and setup IRQ 376 * Close VAS window: Deallocate window hcall and free IRQ 377 * The hypervisor waits until all NX requests are 378 * completed before closing the window. So expects OS 379 * to handle NX faults, means IRQ can be freed only 380 * after the deallocate window hcall is returned. 381 * So once the window is closed with deallocate hcall before 382 * the IRQ is freed, it can be assigned to new allocate 383 * hcall with the same fault IRQ by the hypervisor. It can 384 * result in setup IRQ fail for the new window since the 385 * same fault IRQ is not freed by the OS before. 386 */ 387 mutex_lock(&vas_pseries_mutex); 388 if (migration_in_progress) { 389 rc = -EBUSY; 390 } else { 391 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 392 cop_feat_caps->win_type); 393 if (!rc) 394 caps->nr_open_wins_progress++; 395 } 396 397 mutex_unlock(&vas_pseries_mutex); 398 if (rc) 399 goto out; 400 401 /* 402 * Modify window and it is ready to use. 403 */ 404 rc = h_modify_vas_window(txwin); 405 if (!rc) 406 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 407 if (rc) 408 goto out_free; 409 410 txwin->win_type = cop_feat_caps->win_type; 411 412 /* 413 * The migration SUSPEND thread sets migration_in_progress and 414 * closes all open windows from the list. But the window is 415 * added to the list after open and modify HCALLs. So possible 416 * that migration_in_progress is set before modify HCALL which 417 * may cause some windows are still open when the hypervisor 418 * initiates the migration. 419 * So checks the migration_in_progress flag again and close all 420 * open windows. 421 * 422 * Possible to lose the acquired credit with DLPAR core 423 * removal after the window is opened. So if there are any 424 * closed windows (means with lost credits), do not give new 425 * window to user space. New windows will be opened only 426 * after the existing windows are reopened when credits are 427 * available. 428 */ 429 mutex_lock(&vas_pseries_mutex); 430 if (!caps->nr_close_wins && !migration_in_progress) { 431 list_add(&txwin->win_list, &caps->list); 432 caps->nr_open_windows++; 433 caps->nr_open_wins_progress--; 434 mutex_unlock(&vas_pseries_mutex); 435 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 436 return &txwin->vas_win; 437 } 438 mutex_unlock(&vas_pseries_mutex); 439 440 put_vas_user_win_ref(&txwin->vas_win.task_ref); 441 rc = -EBUSY; 442 pr_err_ratelimited("No credit is available to allocate window\n"); 443 444 out_free: 445 /* 446 * Window is not operational. Free IRQ before closing 447 * window so that do not have to hold mutex. 448 */ 449 free_irq_setup(txwin); 450 h_deallocate_vas_window(txwin->vas_win.winid); 451 /* 452 * Hold mutex and reduce nr_open_wins_progress counter. 453 */ 454 mutex_lock(&vas_pseries_mutex); 455 caps->nr_open_wins_progress--; 456 mutex_unlock(&vas_pseries_mutex); 457 out: 458 atomic_dec(&cop_feat_caps->nr_used_credits); 459 kfree(txwin); 460 return ERR_PTR(rc); 461 } 462 463 static u64 vas_paste_address(struct vas_window *vwin) 464 { 465 struct pseries_vas_window *win; 466 467 win = container_of(vwin, struct pseries_vas_window, vas_win); 468 return win->win_addr; 469 } 470 471 static int deallocate_free_window(struct pseries_vas_window *win) 472 { 473 int rc = 0; 474 475 /* 476 * The hypervisor waits for all requests including faults 477 * are processed before closing the window - Means all 478 * credits have to be returned. In the case of fault 479 * request, a credit is returned after OS issues 480 * H_GET_NX_FAULT hcall. 481 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 482 * hcall. 483 */ 484 rc = h_deallocate_vas_window(win->vas_win.winid); 485 if (!rc) 486 free_irq_setup(win); 487 488 return rc; 489 } 490 491 static int vas_deallocate_window(struct vas_window *vwin) 492 { 493 struct pseries_vas_window *win; 494 struct vas_cop_feat_caps *caps; 495 int rc = 0; 496 497 if (!vwin) 498 return -EINVAL; 499 500 win = container_of(vwin, struct pseries_vas_window, vas_win); 501 502 /* Should not happen */ 503 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 504 pr_err("Window (%u): Invalid window type %u\n", 505 vwin->winid, win->win_type); 506 return -EINVAL; 507 } 508 509 caps = &vascaps[win->win_type].caps; 510 mutex_lock(&vas_pseries_mutex); 511 /* 512 * VAS window is already closed in the hypervisor when 513 * lost the credit or with migration. So just remove the entry 514 * from the list, remove task references and free vas_window 515 * struct. 516 */ 517 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 518 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 519 rc = deallocate_free_window(win); 520 if (rc) { 521 mutex_unlock(&vas_pseries_mutex); 522 return rc; 523 } 524 } else 525 vascaps[win->win_type].nr_close_wins--; 526 527 list_del(&win->win_list); 528 atomic_dec(&caps->nr_used_credits); 529 vascaps[win->win_type].nr_open_windows--; 530 mutex_unlock(&vas_pseries_mutex); 531 532 mm_context_remove_vas_window(vwin->task_ref.mm); 533 put_vas_user_win_ref(&vwin->task_ref); 534 535 kfree(win); 536 return 0; 537 } 538 539 static const struct vas_user_win_ops vops_pseries = { 540 .open_win = vas_allocate_window, /* Open and configure window */ 541 .paste_addr = vas_paste_address, /* To do copy/paste */ 542 .close_win = vas_deallocate_window, /* Close window */ 543 }; 544 545 /* 546 * Supporting only nx-gzip coprocessor type now, but this API code 547 * extended to other coprocessor types later. 548 */ 549 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 550 const char *name) 551 { 552 if (!copypaste_feat) 553 return -ENOTSUPP; 554 555 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 556 } 557 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 558 559 void vas_unregister_api_pseries(void) 560 { 561 vas_unregister_coproc_api(); 562 } 563 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 564 565 /* 566 * Get the specific capabilities based on the feature type. 567 * Right now supports GZIP default and GZIP QoS capabilities. 568 */ 569 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 570 struct hv_vas_cop_feat_caps *hv_caps) 571 { 572 struct vas_cop_feat_caps *caps; 573 struct vas_caps *vcaps; 574 int rc = 0; 575 576 vcaps = &vascaps[type]; 577 memset(vcaps, 0, sizeof(*vcaps)); 578 INIT_LIST_HEAD(&vcaps->list); 579 580 vcaps->feat = feat; 581 caps = &vcaps->caps; 582 583 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 584 (u64)virt_to_phys(hv_caps)); 585 if (rc) 586 return rc; 587 588 caps->user_mode = hv_caps->user_mode; 589 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 590 pr_err("User space COPY/PASTE is not supported\n"); 591 return -ENOTSUPP; 592 } 593 594 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 595 caps->win_type = hv_caps->win_type; 596 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 597 pr_err("Unsupported window type %u\n", caps->win_type); 598 return -EINVAL; 599 } 600 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 601 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 602 atomic_set(&caps->nr_total_credits, 603 be16_to_cpu(hv_caps->target_lpar_creds)); 604 if (feat == VAS_GZIP_DEF_FEAT) { 605 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 606 607 if (caps->max_win_creds < DEF_WIN_CREDS) { 608 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 609 DEF_WIN_CREDS, caps->max_win_creds); 610 return -EINVAL; 611 } 612 } 613 614 rc = sysfs_add_vas_caps(caps); 615 if (rc) 616 return rc; 617 618 copypaste_feat = true; 619 620 return 0; 621 } 622 623 /* 624 * VAS windows can be closed due to lost credits when the core is 625 * removed. So reopen them if credits are available due to DLPAR 626 * core add and set the window active status. When NX sees the page 627 * fault on the unmapped paste address, the kernel handles the fault 628 * by setting the remapping to new paste address if the window is 629 * active. 630 */ 631 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 632 bool migrate) 633 { 634 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 635 struct vas_cop_feat_caps *caps = &vcaps->caps; 636 struct pseries_vas_window *win = NULL, *tmp; 637 int rc, mv_ents = 0; 638 int flag; 639 640 /* 641 * Nothing to do if there are no closed windows. 642 */ 643 if (!vcaps->nr_close_wins) 644 return 0; 645 646 /* 647 * For the core removal, the hypervisor reduces the credits 648 * assigned to the LPAR and the kernel closes VAS windows 649 * in the hypervisor depends on reduced credits. The kernel 650 * uses LIFO (the last windows that are opened will be closed 651 * first) and expects to open in the same order when credits 652 * are available. 653 * For example, 40 windows are closed when the LPAR lost 2 cores 654 * (dedicated). If 1 core is added, this LPAR can have 20 more 655 * credits. It means the kernel can reopen 20 windows. So move 656 * 20 entries in the VAS windows lost and reopen next 20 windows. 657 * For partition migration, reopen all windows that are closed 658 * during resume. 659 */ 660 if ((vcaps->nr_close_wins > creds) && !migrate) 661 mv_ents = vcaps->nr_close_wins - creds; 662 663 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 664 if (!mv_ents) 665 break; 666 667 mv_ents--; 668 } 669 670 /* 671 * Open windows if they are closed only with migration or 672 * DLPAR (lost credit) before. 673 */ 674 if (migrate) 675 flag = VAS_WIN_MIGRATE_CLOSE; 676 else 677 flag = VAS_WIN_NO_CRED_CLOSE; 678 679 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 680 /* 681 * This window is closed with DLPAR and migration events. 682 * So reopen the window with the last event. 683 * The user space is not suspended with the current 684 * migration notifier. So the user space can issue DLPAR 685 * CPU hotplug while migration in progress. In this case 686 * this window will be opened with the last event. 687 */ 688 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 689 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 690 win->vas_win.status &= ~flag; 691 continue; 692 } 693 694 /* 695 * Nothing to do on this window if it is not closed 696 * with this flag 697 */ 698 if (!(win->vas_win.status & flag)) 699 continue; 700 701 rc = allocate_setup_window(win, (u64 *)&domain[0], 702 caps->win_type); 703 if (rc) 704 return rc; 705 706 rc = h_modify_vas_window(win); 707 if (rc) 708 goto out; 709 710 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 711 /* 712 * Set window status to active 713 */ 714 win->vas_win.status &= ~flag; 715 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 716 win->win_type = caps->win_type; 717 if (!--vcaps->nr_close_wins) 718 break; 719 } 720 721 return 0; 722 out: 723 /* 724 * Window modify HCALL failed. So close the window to the 725 * hypervisor and return. 726 */ 727 free_irq_setup(win); 728 h_deallocate_vas_window(win->vas_win.winid); 729 return rc; 730 } 731 732 /* 733 * The hypervisor reduces the available credits if the LPAR lost core. It 734 * means the excessive windows should not be active and the user space 735 * should not be using these windows to send compression requests to NX. 736 * So the kernel closes the excessive windows and unmap the paste address 737 * such that the user space receives paste instruction failure. Then up to 738 * the user space to fall back to SW compression and manage with the 739 * existing windows. 740 */ 741 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 742 bool migrate) 743 { 744 struct pseries_vas_window *win, *tmp; 745 struct vas_user_win_ref *task_ref; 746 struct vm_area_struct *vma; 747 int rc = 0, flag; 748 749 if (migrate) 750 flag = VAS_WIN_MIGRATE_CLOSE; 751 else 752 flag = VAS_WIN_NO_CRED_CLOSE; 753 754 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 755 /* 756 * This window is already closed due to lost credit 757 * or for migration before. Go for next window. 758 * For migration, nothing to do since this window 759 * closed for DLPAR and will be reopened even on 760 * the destination system with other DLPAR operation. 761 */ 762 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 763 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 764 win->vas_win.status |= flag; 765 continue; 766 } 767 768 task_ref = &win->vas_win.task_ref; 769 /* 770 * VAS mmap (coproc_mmap()) and its fault handler 771 * (vas_mmap_fault()) are called after holding mmap lock. 772 * So hold mmap mutex after mmap_lock to avoid deadlock. 773 */ 774 mmap_write_lock(task_ref->mm); 775 mutex_lock(&task_ref->mmap_mutex); 776 vma = task_ref->vma; 777 /* 778 * Number of available credits are reduced, So select 779 * and close windows. 780 */ 781 win->vas_win.status |= flag; 782 783 /* 784 * vma is set in the original mapping. But this mapping 785 * is done with mmap() after the window is opened with ioctl. 786 * so we may not see the original mapping if the core remove 787 * is done before the original mmap() and after the ioctl. 788 */ 789 if (vma) 790 zap_vma_pages(vma); 791 792 mutex_unlock(&task_ref->mmap_mutex); 793 mmap_write_unlock(task_ref->mm); 794 /* 795 * Close VAS window in the hypervisor, but do not 796 * free vas_window struct since it may be reused 797 * when the credit is available later (DLPAR with 798 * adding cores). This struct will be used 799 * later when the process issued with close(FD). 800 */ 801 rc = deallocate_free_window(win); 802 /* 803 * This failure is from the hypervisor. 804 * No way to stop migration for these failures. 805 * So ignore error and continue closing other windows. 806 */ 807 if (rc && !migrate) 808 return rc; 809 810 vcap->nr_close_wins++; 811 812 /* 813 * For migration, do not depend on lpar_creds in case if 814 * mismatch with the hypervisor value (should not happen). 815 * So close all active windows in the list and will be 816 * reopened windows based on the new lpar_creds on the 817 * destination system during resume. 818 */ 819 if (!migrate && !--excess_creds) 820 break; 821 } 822 823 return 0; 824 } 825 826 /* 827 * Get new VAS capabilities when the core add/removal configuration 828 * changes. Reconfig window configurations based on the credits 829 * availability from this new capabilities. 830 */ 831 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 832 { 833 struct vas_cop_feat_caps *caps; 834 int old_nr_creds; 835 struct vas_caps *vcaps; 836 int rc = 0, nr_active_wins; 837 838 if (type >= VAS_MAX_FEAT_TYPE) { 839 pr_err("Invalid credit type %d\n", type); 840 return -EINVAL; 841 } 842 843 vcaps = &vascaps[type]; 844 caps = &vcaps->caps; 845 846 mutex_lock(&vas_pseries_mutex); 847 848 old_nr_creds = atomic_read(&caps->nr_total_credits); 849 850 atomic_set(&caps->nr_total_credits, new_nr_creds); 851 /* 852 * The total number of available credits may be decreased or 853 * increased with DLPAR operation. Means some windows have to be 854 * closed / reopened. Hold the vas_pseries_mutex so that the 855 * user space can not open new windows. 856 */ 857 if (old_nr_creds < new_nr_creds) { 858 /* 859 * If the existing target credits is less than the new 860 * target, reopen windows if they are closed due to 861 * the previous DLPAR (core removal). 862 */ 863 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 864 false); 865 } else { 866 /* 867 * # active windows is more than new LPAR available 868 * credits. So close the excessive windows. 869 * On pseries, each window will have 1 credit. 870 */ 871 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 872 if (nr_active_wins > new_nr_creds) 873 rc = reconfig_close_windows(vcaps, 874 nr_active_wins - new_nr_creds, 875 false); 876 } 877 878 mutex_unlock(&vas_pseries_mutex); 879 return rc; 880 } 881 882 int pseries_vas_dlpar_cpu(void) 883 { 884 int new_nr_creds, rc; 885 886 /* 887 * NX-GZIP is not enabled. Nothing to do for DLPAR event 888 */ 889 if (!copypaste_feat) 890 return 0; 891 892 893 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 894 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 895 (u64)virt_to_phys(&hv_cop_caps)); 896 if (!rc) { 897 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 898 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds); 899 } 900 901 if (rc) 902 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 903 904 return rc; 905 } 906 907 /* 908 * Total number of default credits available (target_credits) 909 * in LPAR depends on number of cores configured. It varies based on 910 * whether processors are in shared mode or dedicated mode. 911 * Get the notifier when CPU configuration is changed with DLPAR 912 * operation so that get the new target_credits (vas default capabilities) 913 * and then update the existing windows usage if needed. 914 */ 915 static int pseries_vas_notifier(struct notifier_block *nb, 916 unsigned long action, void *data) 917 { 918 struct of_reconfig_data *rd = data; 919 struct device_node *dn = rd->dn; 920 const __be32 *intserv = NULL; 921 int len; 922 923 /* 924 * For shared CPU partition, the hypervisor assigns total credits 925 * based on entitled core capacity. So updating VAS windows will 926 * be called from lparcfg_write(). 927 */ 928 if (is_shared_processor()) 929 return NOTIFY_OK; 930 931 if ((action == OF_RECONFIG_ATTACH_NODE) || 932 (action == OF_RECONFIG_DETACH_NODE)) 933 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 934 &len); 935 /* 936 * Processor config is not changed 937 */ 938 if (!intserv) 939 return NOTIFY_OK; 940 941 return pseries_vas_dlpar_cpu(); 942 } 943 944 static struct notifier_block pseries_vas_nb = { 945 .notifier_call = pseries_vas_notifier, 946 }; 947 948 /* 949 * For LPM, all windows have to be closed on the source partition 950 * before migration and reopen them on the destination partition 951 * after migration. So closing windows during suspend and 952 * reopen them during resume. 953 */ 954 int vas_migration_handler(int action) 955 { 956 struct vas_cop_feat_caps *caps; 957 int old_nr_creds, new_nr_creds = 0; 958 struct vas_caps *vcaps; 959 int i, rc = 0; 960 961 pr_info("VAS migration event %d\n", action); 962 963 /* 964 * NX-GZIP is not enabled. Nothing to do for migration. 965 */ 966 if (!copypaste_feat) 967 return rc; 968 969 if (action == VAS_SUSPEND) 970 migration_in_progress = true; 971 else 972 migration_in_progress = false; 973 974 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 975 vcaps = &vascaps[i]; 976 caps = &vcaps->caps; 977 old_nr_creds = atomic_read(&caps->nr_total_credits); 978 979 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 980 vcaps->feat, 981 (u64)virt_to_phys(&hv_cop_caps)); 982 if (!rc) { 983 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 984 /* 985 * Should not happen. But incase print messages, close 986 * all windows in the list during suspend and reopen 987 * windows based on new lpar_creds on the destination 988 * system. 989 */ 990 if (old_nr_creds != new_nr_creds) { 991 pr_err("Target credits mismatch with the hypervisor\n"); 992 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 993 action, old_nr_creds, new_nr_creds); 994 pr_err("Used creds: %d, Active creds: %d\n", 995 atomic_read(&caps->nr_used_credits), 996 vcaps->nr_open_windows - vcaps->nr_close_wins); 997 } 998 } else { 999 pr_err("state(%d): Get VAS capabilities failed with %d\n", 1000 action, rc); 1001 /* 1002 * We can not stop migration with the current lpm 1003 * implementation. So continue closing all windows in 1004 * the list (during suspend) and return without 1005 * opening windows (during resume) if VAS capabilities 1006 * HCALL failed. 1007 */ 1008 if (action == VAS_RESUME) 1009 goto out; 1010 } 1011 1012 switch (action) { 1013 case VAS_SUSPEND: 1014 mutex_lock(&vas_pseries_mutex); 1015 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 1016 true); 1017 /* 1018 * Windows are included in the list after successful 1019 * open. So wait for closing these in-progress open 1020 * windows in vas_allocate_window() which will be 1021 * done if the migration_in_progress is set. 1022 */ 1023 while (vcaps->nr_open_wins_progress) { 1024 mutex_unlock(&vas_pseries_mutex); 1025 msleep(10); 1026 mutex_lock(&vas_pseries_mutex); 1027 } 1028 mutex_unlock(&vas_pseries_mutex); 1029 break; 1030 case VAS_RESUME: 1031 mutex_lock(&vas_pseries_mutex); 1032 atomic_set(&caps->nr_total_credits, new_nr_creds); 1033 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 1034 mutex_unlock(&vas_pseries_mutex); 1035 break; 1036 default: 1037 /* should not happen */ 1038 pr_err("Invalid migration action %d\n", action); 1039 rc = -EINVAL; 1040 goto out; 1041 } 1042 1043 /* 1044 * Ignore errors during suspend and return for resume. 1045 */ 1046 if (rc && (action == VAS_RESUME)) 1047 goto out; 1048 } 1049 1050 pr_info("VAS migration event (%d) successful\n", action); 1051 1052 out: 1053 return rc; 1054 } 1055 1056 static int __init pseries_vas_init(void) 1057 { 1058 struct hv_vas_all_caps *hv_caps; 1059 int rc = 0; 1060 1061 /* 1062 * Linux supports user space COPY/PASTE only with Radix 1063 */ 1064 if (!radix_enabled()) { 1065 copypaste_feat = false; 1066 pr_err("API is supported only with radix page tables\n"); 1067 return -ENOTSUPP; 1068 } 1069 1070 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 1071 if (!hv_caps) 1072 return -ENOMEM; 1073 /* 1074 * Get VAS overall capabilities by passing 0 to feature type. 1075 */ 1076 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 1077 (u64)virt_to_phys(hv_caps)); 1078 if (rc) 1079 goto out; 1080 1081 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 1082 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 1083 1084 sysfs_pseries_vas_init(&caps_all); 1085 1086 /* 1087 * QOS capabilities available 1088 */ 1089 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 1090 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1091 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1092 1093 if (rc) 1094 goto out; 1095 } 1096 /* 1097 * Default capabilities available 1098 */ 1099 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1100 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1101 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1102 1103 if (!rc && copypaste_feat) { 1104 if (firmware_has_feature(FW_FEATURE_LPAR)) 1105 of_reconfig_notifier_register(&pseries_vas_nb); 1106 1107 pr_info("GZIP feature is available\n"); 1108 } else { 1109 /* 1110 * Should not happen, but only when get default 1111 * capabilities HCALL failed. So disable copy paste 1112 * feature. 1113 */ 1114 copypaste_feat = false; 1115 } 1116 1117 out: 1118 kfree(hv_caps); 1119 return rc; 1120 } 1121 machine_device_initcall(pseries, pseries_vas_init); 1122