1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/firmware.h> 20 #include <asm/vphn.h> 21 #include <asm/vas.h> 22 #include "vas.h" 23 24 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 25 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 26 /* The hypervisor allows one credit per window right now */ 27 #define DEF_WIN_CREDS 1 28 29 static struct vas_all_caps caps_all; 30 static bool copypaste_feat; 31 static struct hv_vas_cop_feat_caps hv_cop_caps; 32 33 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 34 static DEFINE_MUTEX(vas_pseries_mutex); 35 static bool migration_in_progress; 36 37 static long hcall_return_busy_check(long rc) 38 { 39 /* Check if we are stalled for some time */ 40 if (H_IS_LONG_BUSY(rc)) { 41 unsigned int ms; 42 /* 43 * Allocate, Modify and Deallocate HCALLs returns 44 * H_LONG_BUSY_ORDER_1_MSEC or H_LONG_BUSY_ORDER_10_MSEC 45 * for the long delay. So the sleep time should always 46 * be either 1 or 10msecs, but in case if the HCALL 47 * returns the long delay > 10 msecs, clamp the sleep 48 * time to 10msecs. 49 */ 50 ms = clamp(get_longbusy_msecs(rc), 1, 10); 51 52 /* 53 * msleep() will often sleep at least 20 msecs even 54 * though the hypervisor suggests that the OS reissue 55 * HCALLs after 1 or 10msecs. Also the delay hint from 56 * the HCALL is just a suggestion. So OK to pause for 57 * less time than the hinted delay. Use usleep_range() 58 * to ensure we don't sleep much longer than actually 59 * needed. 60 */ 61 usleep_range(ms * (USEC_PER_MSEC / 10), ms * USEC_PER_MSEC); 62 rc = H_BUSY; 63 } else if (rc == H_BUSY) { 64 cond_resched(); 65 } 66 67 return rc; 68 } 69 70 /* 71 * Allocate VAS window hcall 72 */ 73 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 74 u8 wintype, u16 credits) 75 { 76 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 77 long rc; 78 79 do { 80 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 81 credits, domain[0], domain[1], domain[2], 82 domain[3], domain[4], domain[5]); 83 84 rc = hcall_return_busy_check(rc); 85 } while (rc == H_BUSY); 86 87 if (rc == H_SUCCESS) { 88 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 89 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 90 return -ENOTSUPP; 91 } 92 win->vas_win.winid = retbuf[0]; 93 win->win_addr = retbuf[1]; 94 win->complete_irq = retbuf[2]; 95 win->fault_irq = retbuf[3]; 96 return 0; 97 } 98 99 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 100 rc, wintype, credits); 101 102 return -EIO; 103 } 104 105 /* 106 * Deallocate VAS window hcall. 107 */ 108 static int h_deallocate_vas_window(u64 winid) 109 { 110 long rc; 111 112 do { 113 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 114 115 rc = hcall_return_busy_check(rc); 116 } while (rc == H_BUSY); 117 118 if (rc == H_SUCCESS) 119 return 0; 120 121 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 122 rc, winid); 123 return -EIO; 124 } 125 126 /* 127 * Modify VAS window. 128 * After the window is opened with allocate window hcall, configure it 129 * with flags and LPAR PID before using. 130 */ 131 static int h_modify_vas_window(struct pseries_vas_window *win) 132 { 133 long rc; 134 135 /* 136 * AMR value is not supported in Linux VAS implementation. 137 * The hypervisor ignores it if 0 is passed. 138 */ 139 do { 140 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 141 win->vas_win.winid, win->pid, 0, 142 VAS_MOD_WIN_FLAGS, 0); 143 144 rc = hcall_return_busy_check(rc); 145 } while (rc == H_BUSY); 146 147 if (rc == H_SUCCESS) 148 return 0; 149 150 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 151 rc, win->vas_win.winid, win->pid); 152 return -EIO; 153 } 154 155 /* 156 * This hcall is used to determine the capabilities from the hypervisor. 157 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 158 * @query_type: If 0 is passed, the hypervisor returns the overall 159 * capabilities which provides all feature(s) that are 160 * available. Then query the hypervisor to get the 161 * corresponding capabilities for the specific feature. 162 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 163 * and VAS GZIP Default capabilities. 164 * H_QUERY_NX_CAPABILITIES provides NX GZIP 165 * capabilities. 166 * @result: Return buffer to save capabilities. 167 */ 168 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 169 { 170 long rc; 171 172 rc = plpar_hcall_norets(hcall, query_type, result); 173 174 if (rc == H_SUCCESS) 175 return 0; 176 177 /* H_FUNCTION means HV does not support VAS so don't print an error */ 178 if (rc != H_FUNCTION) { 179 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 180 (hcall == H_QUERY_VAS_CAPABILITIES) ? 181 "H_QUERY_VAS_CAPABILITIES" : 182 "H_QUERY_NX_CAPABILITIES", 183 rc, query_type, result); 184 } 185 186 return -EIO; 187 } 188 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 189 190 /* 191 * hcall to get fault CRB from the hypervisor. 192 */ 193 static int h_get_nx_fault(u32 winid, u64 buffer) 194 { 195 long rc; 196 197 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 198 199 if (rc == H_SUCCESS) 200 return 0; 201 202 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 203 rc, winid, buffer); 204 return -EIO; 205 206 } 207 208 /* 209 * Handle the fault interrupt. 210 * When the fault interrupt is received for each window, query the 211 * hypervisor to get the fault CRB on the specific fault. Then 212 * process the CRB by updating CSB or send signal if the user space 213 * CSB is invalid. 214 * Note: The hypervisor forwards an interrupt for each fault request. 215 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 216 */ 217 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 218 { 219 struct pseries_vas_window *txwin = data; 220 struct coprocessor_request_block crb; 221 struct vas_user_win_ref *tsk_ref; 222 int rc; 223 224 while (atomic_read(&txwin->pending_faults)) { 225 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 226 if (!rc) { 227 tsk_ref = &txwin->vas_win.task_ref; 228 vas_dump_crb(&crb); 229 vas_update_csb(&crb, tsk_ref); 230 } 231 atomic_dec(&txwin->pending_faults); 232 } 233 234 return IRQ_HANDLED; 235 } 236 237 /* 238 * irq_default_primary_handler() can be used only with IRQF_ONESHOT 239 * which disables IRQ before executing the thread handler and enables 240 * it after. But this disabling interrupt sets the VAS IRQ OFF 241 * state in the hypervisor. If the NX generates fault interrupt 242 * during this window, the hypervisor will not deliver this 243 * interrupt to the LPAR. So use VAS specific IRQ handler instead 244 * of calling the default primary handler. 245 */ 246 static irqreturn_t pseries_vas_irq_handler(int irq, void *data) 247 { 248 struct pseries_vas_window *txwin = data; 249 250 /* 251 * The thread handler will process this interrupt if it is 252 * already running. 253 */ 254 atomic_inc(&txwin->pending_faults); 255 256 return IRQ_WAKE_THREAD; 257 } 258 259 /* 260 * Allocate window and setup IRQ mapping. 261 */ 262 static int allocate_setup_window(struct pseries_vas_window *txwin, 263 u64 *domain, u8 wintype) 264 { 265 int rc; 266 267 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 268 if (rc) 269 return rc; 270 /* 271 * On PowerVM, the hypervisor setup and forwards the fault 272 * interrupt per window. So the IRQ setup and fault handling 273 * will be done for each open window separately. 274 */ 275 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 276 if (!txwin->fault_virq) { 277 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 278 rc = -EINVAL; 279 goto out_win; 280 } 281 282 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 283 txwin->vas_win.winid); 284 if (!txwin->name) { 285 rc = -ENOMEM; 286 goto out_irq; 287 } 288 289 rc = request_threaded_irq(txwin->fault_virq, 290 pseries_vas_irq_handler, 291 pseries_vas_fault_thread_fn, 0, 292 txwin->name, txwin); 293 if (rc) { 294 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 295 txwin->vas_win.winid, txwin->fault_virq, rc); 296 goto out_free; 297 } 298 299 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 300 301 return 0; 302 out_free: 303 kfree(txwin->name); 304 out_irq: 305 irq_dispose_mapping(txwin->fault_virq); 306 out_win: 307 h_deallocate_vas_window(txwin->vas_win.winid); 308 return rc; 309 } 310 311 static inline void free_irq_setup(struct pseries_vas_window *txwin) 312 { 313 free_irq(txwin->fault_virq, txwin); 314 kfree(txwin->name); 315 irq_dispose_mapping(txwin->fault_virq); 316 } 317 318 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 319 enum vas_cop_type cop_type) 320 { 321 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 322 struct vas_cop_feat_caps *cop_feat_caps; 323 struct vas_caps *caps; 324 struct pseries_vas_window *txwin; 325 int rc; 326 327 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 328 if (!txwin) 329 return ERR_PTR(-ENOMEM); 330 331 /* 332 * A VAS window can have many credits which means that many 333 * requests can be issued simultaneously. But the hypervisor 334 * restricts one credit per window. 335 * The hypervisor introduces 2 different types of credits: 336 * Default credit type (Uses normal priority FIFO): 337 * A limited number of credits are assigned to partitions 338 * based on processor entitlement. But these credits may be 339 * over-committed on a system depends on whether the CPUs 340 * are in shared or dedicated modes - that is, more requests 341 * may be issued across the system than NX can service at 342 * once which can result in paste command failure (RMA_busy). 343 * Then the process has to resend requests or fall-back to 344 * SW compression. 345 * Quality of Service (QoS) credit type (Uses high priority FIFO): 346 * To avoid NX HW contention, the system admins can assign 347 * QoS credits for each LPAR so that this partition is 348 * guaranteed access to NX resources. These credits are 349 * assigned to partitions via the HMC. 350 * Refer PAPR for more information. 351 * 352 * Allocate window with QoS credits if user requested. Otherwise 353 * default credits are used. 354 */ 355 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 356 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 357 else 358 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 359 360 cop_feat_caps = &caps->caps; 361 362 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 363 atomic_read(&cop_feat_caps->nr_total_credits)) { 364 pr_err_ratelimited("Credits are not available to allocate window\n"); 365 rc = -EINVAL; 366 goto out; 367 } 368 369 if (vas_id == -1) { 370 /* 371 * The user space is requesting to allocate a window on 372 * a VAS instance where the process is executing. 373 * On PowerVM, domain values are passed to the hypervisor 374 * to select VAS instance. Useful if the process is 375 * affinity to NUMA node. 376 * The hypervisor selects VAS instance if 377 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 378 * The h_allocate_vas_window hcall is defined to take a 379 * domain values as specified by h_home_node_associativity, 380 * So no unpacking needs to be done. 381 */ 382 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 383 VPHN_FLAG_VCPU, hard_smp_processor_id()); 384 if (rc != H_SUCCESS) { 385 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 386 goto out; 387 } 388 } 389 390 txwin->pid = mfspr(SPRN_PID); 391 392 /* 393 * Allocate / Deallocate window hcalls and setup / free IRQs 394 * have to be protected with mutex. 395 * Open VAS window: Allocate window hcall and setup IRQ 396 * Close VAS window: Deallocate window hcall and free IRQ 397 * The hypervisor waits until all NX requests are 398 * completed before closing the window. So expects OS 399 * to handle NX faults, means IRQ can be freed only 400 * after the deallocate window hcall is returned. 401 * So once the window is closed with deallocate hcall before 402 * the IRQ is freed, it can be assigned to new allocate 403 * hcall with the same fault IRQ by the hypervisor. It can 404 * result in setup IRQ fail for the new window since the 405 * same fault IRQ is not freed by the OS before. 406 */ 407 mutex_lock(&vas_pseries_mutex); 408 if (migration_in_progress) { 409 rc = -EBUSY; 410 } else { 411 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 412 cop_feat_caps->win_type); 413 if (!rc) 414 caps->nr_open_wins_progress++; 415 } 416 417 mutex_unlock(&vas_pseries_mutex); 418 if (rc) 419 goto out; 420 421 /* 422 * Modify window and it is ready to use. 423 */ 424 rc = h_modify_vas_window(txwin); 425 if (!rc) 426 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 427 if (rc) 428 goto out_free; 429 430 txwin->win_type = cop_feat_caps->win_type; 431 432 /* 433 * The migration SUSPEND thread sets migration_in_progress and 434 * closes all open windows from the list. But the window is 435 * added to the list after open and modify HCALLs. So possible 436 * that migration_in_progress is set before modify HCALL which 437 * may cause some windows are still open when the hypervisor 438 * initiates the migration. 439 * So checks the migration_in_progress flag again and close all 440 * open windows. 441 * 442 * Possible to lose the acquired credit with DLPAR core 443 * removal after the window is opened. So if there are any 444 * closed windows (means with lost credits), do not give new 445 * window to user space. New windows will be opened only 446 * after the existing windows are reopened when credits are 447 * available. 448 */ 449 mutex_lock(&vas_pseries_mutex); 450 if (!caps->nr_close_wins && !migration_in_progress) { 451 list_add(&txwin->win_list, &caps->list); 452 caps->nr_open_windows++; 453 caps->nr_open_wins_progress--; 454 mutex_unlock(&vas_pseries_mutex); 455 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 456 return &txwin->vas_win; 457 } 458 mutex_unlock(&vas_pseries_mutex); 459 460 put_vas_user_win_ref(&txwin->vas_win.task_ref); 461 rc = -EBUSY; 462 pr_err_ratelimited("No credit is available to allocate window\n"); 463 464 out_free: 465 /* 466 * Window is not operational. Free IRQ before closing 467 * window so that do not have to hold mutex. 468 */ 469 free_irq_setup(txwin); 470 h_deallocate_vas_window(txwin->vas_win.winid); 471 /* 472 * Hold mutex and reduce nr_open_wins_progress counter. 473 */ 474 mutex_lock(&vas_pseries_mutex); 475 caps->nr_open_wins_progress--; 476 mutex_unlock(&vas_pseries_mutex); 477 out: 478 atomic_dec(&cop_feat_caps->nr_used_credits); 479 kfree(txwin); 480 return ERR_PTR(rc); 481 } 482 483 static u64 vas_paste_address(struct vas_window *vwin) 484 { 485 struct pseries_vas_window *win; 486 487 win = container_of(vwin, struct pseries_vas_window, vas_win); 488 return win->win_addr; 489 } 490 491 static int deallocate_free_window(struct pseries_vas_window *win) 492 { 493 int rc = 0; 494 495 /* 496 * The hypervisor waits for all requests including faults 497 * are processed before closing the window - Means all 498 * credits have to be returned. In the case of fault 499 * request, a credit is returned after OS issues 500 * H_GET_NX_FAULT hcall. 501 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 502 * hcall. 503 */ 504 rc = h_deallocate_vas_window(win->vas_win.winid); 505 if (!rc) 506 free_irq_setup(win); 507 508 return rc; 509 } 510 511 static int vas_deallocate_window(struct vas_window *vwin) 512 { 513 struct pseries_vas_window *win; 514 struct vas_cop_feat_caps *caps; 515 int rc = 0; 516 517 if (!vwin) 518 return -EINVAL; 519 520 win = container_of(vwin, struct pseries_vas_window, vas_win); 521 522 /* Should not happen */ 523 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 524 pr_err("Window (%u): Invalid window type %u\n", 525 vwin->winid, win->win_type); 526 return -EINVAL; 527 } 528 529 caps = &vascaps[win->win_type].caps; 530 mutex_lock(&vas_pseries_mutex); 531 /* 532 * VAS window is already closed in the hypervisor when 533 * lost the credit or with migration. So just remove the entry 534 * from the list, remove task references and free vas_window 535 * struct. 536 */ 537 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 538 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 539 rc = deallocate_free_window(win); 540 if (rc) { 541 mutex_unlock(&vas_pseries_mutex); 542 return rc; 543 } 544 } else 545 vascaps[win->win_type].nr_close_wins--; 546 547 list_del(&win->win_list); 548 atomic_dec(&caps->nr_used_credits); 549 vascaps[win->win_type].nr_open_windows--; 550 mutex_unlock(&vas_pseries_mutex); 551 552 mm_context_remove_vas_window(vwin->task_ref.mm); 553 put_vas_user_win_ref(&vwin->task_ref); 554 555 kfree(win); 556 return 0; 557 } 558 559 static const struct vas_user_win_ops vops_pseries = { 560 .open_win = vas_allocate_window, /* Open and configure window */ 561 .paste_addr = vas_paste_address, /* To do copy/paste */ 562 .close_win = vas_deallocate_window, /* Close window */ 563 }; 564 565 /* 566 * Supporting only nx-gzip coprocessor type now, but this API code 567 * extended to other coprocessor types later. 568 */ 569 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 570 const char *name) 571 { 572 if (!copypaste_feat) 573 return -ENOTSUPP; 574 575 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 576 } 577 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 578 579 void vas_unregister_api_pseries(void) 580 { 581 vas_unregister_coproc_api(); 582 } 583 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 584 585 /* 586 * Get the specific capabilities based on the feature type. 587 * Right now supports GZIP default and GZIP QoS capabilities. 588 */ 589 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 590 struct hv_vas_cop_feat_caps *hv_caps) 591 { 592 struct vas_cop_feat_caps *caps; 593 struct vas_caps *vcaps; 594 int rc = 0; 595 596 vcaps = &vascaps[type]; 597 memset(vcaps, 0, sizeof(*vcaps)); 598 INIT_LIST_HEAD(&vcaps->list); 599 600 vcaps->feat = feat; 601 caps = &vcaps->caps; 602 603 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 604 (u64)virt_to_phys(hv_caps)); 605 if (rc) 606 return rc; 607 608 caps->user_mode = hv_caps->user_mode; 609 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 610 pr_err("User space COPY/PASTE is not supported\n"); 611 return -ENOTSUPP; 612 } 613 614 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 615 caps->win_type = hv_caps->win_type; 616 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 617 pr_err("Unsupported window type %u\n", caps->win_type); 618 return -EINVAL; 619 } 620 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 621 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 622 atomic_set(&caps->nr_total_credits, 623 be16_to_cpu(hv_caps->target_lpar_creds)); 624 if (feat == VAS_GZIP_DEF_FEAT) { 625 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 626 627 if (caps->max_win_creds < DEF_WIN_CREDS) { 628 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 629 DEF_WIN_CREDS, caps->max_win_creds); 630 return -EINVAL; 631 } 632 } 633 634 rc = sysfs_add_vas_caps(caps); 635 if (rc) 636 return rc; 637 638 copypaste_feat = true; 639 640 return 0; 641 } 642 643 /* 644 * VAS windows can be closed due to lost credits when the core is 645 * removed. So reopen them if credits are available due to DLPAR 646 * core add and set the window active status. When NX sees the page 647 * fault on the unmapped paste address, the kernel handles the fault 648 * by setting the remapping to new paste address if the window is 649 * active. 650 */ 651 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 652 bool migrate) 653 { 654 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 655 struct vas_cop_feat_caps *caps = &vcaps->caps; 656 struct pseries_vas_window *win = NULL, *tmp; 657 int rc, mv_ents = 0; 658 int flag; 659 660 /* 661 * Nothing to do if there are no closed windows. 662 */ 663 if (!vcaps->nr_close_wins) 664 return 0; 665 666 /* 667 * For the core removal, the hypervisor reduces the credits 668 * assigned to the LPAR and the kernel closes VAS windows 669 * in the hypervisor depends on reduced credits. The kernel 670 * uses LIFO (the last windows that are opened will be closed 671 * first) and expects to open in the same order when credits 672 * are available. 673 * For example, 40 windows are closed when the LPAR lost 2 cores 674 * (dedicated). If 1 core is added, this LPAR can have 20 more 675 * credits. It means the kernel can reopen 20 windows. So move 676 * 20 entries in the VAS windows lost and reopen next 20 windows. 677 * For partition migration, reopen all windows that are closed 678 * during resume. 679 */ 680 if ((vcaps->nr_close_wins > creds) && !migrate) 681 mv_ents = vcaps->nr_close_wins - creds; 682 683 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 684 if (!mv_ents) 685 break; 686 687 mv_ents--; 688 } 689 690 /* 691 * Open windows if they are closed only with migration or 692 * DLPAR (lost credit) before. 693 */ 694 if (migrate) 695 flag = VAS_WIN_MIGRATE_CLOSE; 696 else 697 flag = VAS_WIN_NO_CRED_CLOSE; 698 699 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 700 /* 701 * This window is closed with DLPAR and migration events. 702 * So reopen the window with the last event. 703 * The user space is not suspended with the current 704 * migration notifier. So the user space can issue DLPAR 705 * CPU hotplug while migration in progress. In this case 706 * this window will be opened with the last event. 707 */ 708 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 709 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 710 win->vas_win.status &= ~flag; 711 continue; 712 } 713 714 /* 715 * Nothing to do on this window if it is not closed 716 * with this flag 717 */ 718 if (!(win->vas_win.status & flag)) 719 continue; 720 721 rc = allocate_setup_window(win, (u64 *)&domain[0], 722 caps->win_type); 723 if (rc) 724 return rc; 725 726 rc = h_modify_vas_window(win); 727 if (rc) 728 goto out; 729 730 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 731 /* 732 * Set window status to active 733 */ 734 win->vas_win.status &= ~flag; 735 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 736 win->win_type = caps->win_type; 737 if (!--vcaps->nr_close_wins) 738 break; 739 } 740 741 return 0; 742 out: 743 /* 744 * Window modify HCALL failed. So close the window to the 745 * hypervisor and return. 746 */ 747 free_irq_setup(win); 748 h_deallocate_vas_window(win->vas_win.winid); 749 return rc; 750 } 751 752 /* 753 * The hypervisor reduces the available credits if the LPAR lost core. It 754 * means the excessive windows should not be active and the user space 755 * should not be using these windows to send compression requests to NX. 756 * So the kernel closes the excessive windows and unmap the paste address 757 * such that the user space receives paste instruction failure. Then up to 758 * the user space to fall back to SW compression and manage with the 759 * existing windows. 760 */ 761 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 762 bool migrate) 763 { 764 struct pseries_vas_window *win, *tmp; 765 struct vas_user_win_ref *task_ref; 766 struct vm_area_struct *vma; 767 int rc = 0, flag; 768 769 if (migrate) 770 flag = VAS_WIN_MIGRATE_CLOSE; 771 else 772 flag = VAS_WIN_NO_CRED_CLOSE; 773 774 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 775 /* 776 * This window is already closed due to lost credit 777 * or for migration before. Go for next window. 778 * For migration, nothing to do since this window 779 * closed for DLPAR and will be reopened even on 780 * the destination system with other DLPAR operation. 781 */ 782 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 783 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 784 win->vas_win.status |= flag; 785 continue; 786 } 787 788 task_ref = &win->vas_win.task_ref; 789 /* 790 * VAS mmap (coproc_mmap()) and its fault handler 791 * (vas_mmap_fault()) are called after holding mmap lock. 792 * So hold mmap mutex after mmap_lock to avoid deadlock. 793 */ 794 mmap_write_lock(task_ref->mm); 795 mutex_lock(&task_ref->mmap_mutex); 796 vma = task_ref->vma; 797 /* 798 * Number of available credits are reduced, So select 799 * and close windows. 800 */ 801 win->vas_win.status |= flag; 802 803 /* 804 * vma is set in the original mapping. But this mapping 805 * is done with mmap() after the window is opened with ioctl. 806 * so we may not see the original mapping if the core remove 807 * is done before the original mmap() and after the ioctl. 808 */ 809 if (vma) 810 zap_vma_pages(vma); 811 812 mutex_unlock(&task_ref->mmap_mutex); 813 mmap_write_unlock(task_ref->mm); 814 /* 815 * Close VAS window in the hypervisor, but do not 816 * free vas_window struct since it may be reused 817 * when the credit is available later (DLPAR with 818 * adding cores). This struct will be used 819 * later when the process issued with close(FD). 820 */ 821 rc = deallocate_free_window(win); 822 /* 823 * This failure is from the hypervisor. 824 * No way to stop migration for these failures. 825 * So ignore error and continue closing other windows. 826 */ 827 if (rc && !migrate) 828 return rc; 829 830 vcap->nr_close_wins++; 831 832 /* 833 * For migration, do not depend on lpar_creds in case if 834 * mismatch with the hypervisor value (should not happen). 835 * So close all active windows in the list and will be 836 * reopened windows based on the new lpar_creds on the 837 * destination system during resume. 838 */ 839 if (!migrate && !--excess_creds) 840 break; 841 } 842 843 return 0; 844 } 845 846 /* 847 * Get new VAS capabilities when the core add/removal configuration 848 * changes. Reconfig window configurations based on the credits 849 * availability from this new capabilities. 850 */ 851 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 852 { 853 struct vas_cop_feat_caps *caps; 854 int old_nr_creds; 855 struct vas_caps *vcaps; 856 int rc = 0, nr_active_wins; 857 858 if (type >= VAS_MAX_FEAT_TYPE) { 859 pr_err("Invalid credit type %d\n", type); 860 return -EINVAL; 861 } 862 863 vcaps = &vascaps[type]; 864 caps = &vcaps->caps; 865 866 mutex_lock(&vas_pseries_mutex); 867 868 old_nr_creds = atomic_read(&caps->nr_total_credits); 869 870 atomic_set(&caps->nr_total_credits, new_nr_creds); 871 /* 872 * The total number of available credits may be decreased or 873 * increased with DLPAR operation. Means some windows have to be 874 * closed / reopened. Hold the vas_pseries_mutex so that the 875 * user space can not open new windows. 876 */ 877 if (old_nr_creds < new_nr_creds) { 878 /* 879 * If the existing target credits is less than the new 880 * target, reopen windows if they are closed due to 881 * the previous DLPAR (core removal). 882 */ 883 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 884 false); 885 } else { 886 /* 887 * # active windows is more than new LPAR available 888 * credits. So close the excessive windows. 889 * On pseries, each window will have 1 credit. 890 */ 891 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 892 if (nr_active_wins > new_nr_creds) 893 rc = reconfig_close_windows(vcaps, 894 nr_active_wins - new_nr_creds, 895 false); 896 } 897 898 mutex_unlock(&vas_pseries_mutex); 899 return rc; 900 } 901 902 int pseries_vas_dlpar_cpu(void) 903 { 904 int new_nr_creds, rc; 905 906 /* 907 * NX-GZIP is not enabled. Nothing to do for DLPAR event 908 */ 909 if (!copypaste_feat) 910 return 0; 911 912 913 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 914 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 915 (u64)virt_to_phys(&hv_cop_caps)); 916 if (!rc) { 917 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 918 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds); 919 } 920 921 if (rc) 922 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 923 924 return rc; 925 } 926 927 /* 928 * Total number of default credits available (target_credits) 929 * in LPAR depends on number of cores configured. It varies based on 930 * whether processors are in shared mode or dedicated mode. 931 * Get the notifier when CPU configuration is changed with DLPAR 932 * operation so that get the new target_credits (vas default capabilities) 933 * and then update the existing windows usage if needed. 934 */ 935 static int pseries_vas_notifier(struct notifier_block *nb, 936 unsigned long action, void *data) 937 { 938 struct of_reconfig_data *rd = data; 939 struct device_node *dn = rd->dn; 940 const __be32 *intserv = NULL; 941 int len; 942 943 /* 944 * For shared CPU partition, the hypervisor assigns total credits 945 * based on entitled core capacity. So updating VAS windows will 946 * be called from lparcfg_write(). 947 */ 948 if (is_shared_processor()) 949 return NOTIFY_OK; 950 951 if ((action == OF_RECONFIG_ATTACH_NODE) || 952 (action == OF_RECONFIG_DETACH_NODE)) 953 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 954 &len); 955 /* 956 * Processor config is not changed 957 */ 958 if (!intserv) 959 return NOTIFY_OK; 960 961 return pseries_vas_dlpar_cpu(); 962 } 963 964 static struct notifier_block pseries_vas_nb = { 965 .notifier_call = pseries_vas_notifier, 966 }; 967 968 /* 969 * For LPM, all windows have to be closed on the source partition 970 * before migration and reopen them on the destination partition 971 * after migration. So closing windows during suspend and 972 * reopen them during resume. 973 */ 974 int vas_migration_handler(int action) 975 { 976 struct vas_cop_feat_caps *caps; 977 int old_nr_creds, new_nr_creds = 0; 978 struct vas_caps *vcaps; 979 int i, rc = 0; 980 981 pr_info("VAS migration event %d\n", action); 982 983 /* 984 * NX-GZIP is not enabled. Nothing to do for migration. 985 */ 986 if (!copypaste_feat) 987 return rc; 988 989 if (action == VAS_SUSPEND) 990 migration_in_progress = true; 991 else 992 migration_in_progress = false; 993 994 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 995 vcaps = &vascaps[i]; 996 caps = &vcaps->caps; 997 old_nr_creds = atomic_read(&caps->nr_total_credits); 998 999 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 1000 vcaps->feat, 1001 (u64)virt_to_phys(&hv_cop_caps)); 1002 if (!rc) { 1003 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 1004 /* 1005 * Should not happen. But incase print messages, close 1006 * all windows in the list during suspend and reopen 1007 * windows based on new lpar_creds on the destination 1008 * system. 1009 */ 1010 if (old_nr_creds != new_nr_creds) { 1011 pr_err("Target credits mismatch with the hypervisor\n"); 1012 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 1013 action, old_nr_creds, new_nr_creds); 1014 pr_err("Used creds: %d, Active creds: %d\n", 1015 atomic_read(&caps->nr_used_credits), 1016 vcaps->nr_open_windows - vcaps->nr_close_wins); 1017 } 1018 } else { 1019 pr_err("state(%d): Get VAS capabilities failed with %d\n", 1020 action, rc); 1021 /* 1022 * We can not stop migration with the current lpm 1023 * implementation. So continue closing all windows in 1024 * the list (during suspend) and return without 1025 * opening windows (during resume) if VAS capabilities 1026 * HCALL failed. 1027 */ 1028 if (action == VAS_RESUME) 1029 goto out; 1030 } 1031 1032 switch (action) { 1033 case VAS_SUSPEND: 1034 mutex_lock(&vas_pseries_mutex); 1035 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 1036 true); 1037 /* 1038 * Windows are included in the list after successful 1039 * open. So wait for closing these in-progress open 1040 * windows in vas_allocate_window() which will be 1041 * done if the migration_in_progress is set. 1042 */ 1043 while (vcaps->nr_open_wins_progress) { 1044 mutex_unlock(&vas_pseries_mutex); 1045 msleep(10); 1046 mutex_lock(&vas_pseries_mutex); 1047 } 1048 mutex_unlock(&vas_pseries_mutex); 1049 break; 1050 case VAS_RESUME: 1051 mutex_lock(&vas_pseries_mutex); 1052 atomic_set(&caps->nr_total_credits, new_nr_creds); 1053 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 1054 mutex_unlock(&vas_pseries_mutex); 1055 break; 1056 default: 1057 /* should not happen */ 1058 pr_err("Invalid migration action %d\n", action); 1059 rc = -EINVAL; 1060 goto out; 1061 } 1062 1063 /* 1064 * Ignore errors during suspend and return for resume. 1065 */ 1066 if (rc && (action == VAS_RESUME)) 1067 goto out; 1068 } 1069 1070 pr_info("VAS migration event (%d) successful\n", action); 1071 1072 out: 1073 return rc; 1074 } 1075 1076 static int __init pseries_vas_init(void) 1077 { 1078 struct hv_vas_all_caps *hv_caps; 1079 int rc = 0; 1080 1081 /* 1082 * Linux supports user space COPY/PASTE only with Radix 1083 */ 1084 if (!radix_enabled()) { 1085 copypaste_feat = false; 1086 pr_err("API is supported only with radix page tables\n"); 1087 return -ENOTSUPP; 1088 } 1089 1090 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 1091 if (!hv_caps) 1092 return -ENOMEM; 1093 /* 1094 * Get VAS overall capabilities by passing 0 to feature type. 1095 */ 1096 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 1097 (u64)virt_to_phys(hv_caps)); 1098 if (rc) 1099 goto out; 1100 1101 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 1102 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 1103 1104 sysfs_pseries_vas_init(&caps_all); 1105 1106 /* 1107 * QOS capabilities available 1108 */ 1109 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 1110 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1111 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1112 1113 if (rc) 1114 goto out; 1115 } 1116 /* 1117 * Default capabilities available 1118 */ 1119 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1120 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1121 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1122 1123 if (!rc && copypaste_feat) { 1124 if (firmware_has_feature(FW_FEATURE_LPAR)) 1125 of_reconfig_notifier_register(&pseries_vas_nb); 1126 1127 pr_info("GZIP feature is available\n"); 1128 } else { 1129 /* 1130 * Should not happen, but only when get default 1131 * capabilities HCALL failed. So disable copy paste 1132 * feature. 1133 */ 1134 copypaste_feat = false; 1135 } 1136 1137 out: 1138 kfree(hv_caps); 1139 return rc; 1140 } 1141 machine_device_initcall(pseries, pseries_vas_init); 1142