xref: /linux/arch/powerpc/platforms/pseries/vas.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2020-21 IBM Corp.
4  */
5 
6 #define pr_fmt(fmt) "vas: " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/firmware.h>
20 #include <asm/vphn.h>
21 #include <asm/vas.h>
22 #include "vas.h"
23 
24 #define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
25 #define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
26 /* The hypervisor allows one credit per window right now */
27 #define DEF_WIN_CREDS		1
28 
29 static struct vas_all_caps caps_all;
30 static bool copypaste_feat;
31 static struct hv_vas_cop_feat_caps hv_cop_caps;
32 
33 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
34 static DEFINE_MUTEX(vas_pseries_mutex);
35 static bool migration_in_progress;
36 
37 static long hcall_return_busy_check(long rc)
38 {
39 	/* Check if we are stalled for some time */
40 	if (H_IS_LONG_BUSY(rc)) {
41 		msleep(get_longbusy_msecs(rc));
42 		rc = H_BUSY;
43 	} else if (rc == H_BUSY) {
44 		cond_resched();
45 	}
46 
47 	return rc;
48 }
49 
50 /*
51  * Allocate VAS window hcall
52  */
53 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
54 				     u8 wintype, u16 credits)
55 {
56 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
57 	long rc;
58 
59 	do {
60 		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
61 				  credits, domain[0], domain[1], domain[2],
62 				  domain[3], domain[4], domain[5]);
63 
64 		rc = hcall_return_busy_check(rc);
65 	} while (rc == H_BUSY);
66 
67 	if (rc == H_SUCCESS) {
68 		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
69 			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
70 			return -ENOTSUPP;
71 		}
72 		win->vas_win.winid = retbuf[0];
73 		win->win_addr = retbuf[1];
74 		win->complete_irq = retbuf[2];
75 		win->fault_irq = retbuf[3];
76 		return 0;
77 	}
78 
79 	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
80 		rc, wintype, credits);
81 
82 	return -EIO;
83 }
84 
85 /*
86  * Deallocate VAS window hcall.
87  */
88 static int h_deallocate_vas_window(u64 winid)
89 {
90 	long rc;
91 
92 	do {
93 		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
94 
95 		rc = hcall_return_busy_check(rc);
96 	} while (rc == H_BUSY);
97 
98 	if (rc == H_SUCCESS)
99 		return 0;
100 
101 	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
102 		rc, winid);
103 	return -EIO;
104 }
105 
106 /*
107  * Modify VAS window.
108  * After the window is opened with allocate window hcall, configure it
109  * with flags and LPAR PID before using.
110  */
111 static int h_modify_vas_window(struct pseries_vas_window *win)
112 {
113 	long rc;
114 
115 	/*
116 	 * AMR value is not supported in Linux VAS implementation.
117 	 * The hypervisor ignores it if 0 is passed.
118 	 */
119 	do {
120 		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
121 					win->vas_win.winid, win->pid, 0,
122 					VAS_MOD_WIN_FLAGS, 0);
123 
124 		rc = hcall_return_busy_check(rc);
125 	} while (rc == H_BUSY);
126 
127 	if (rc == H_SUCCESS)
128 		return 0;
129 
130 	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
131 			rc, win->vas_win.winid, win->pid);
132 	return -EIO;
133 }
134 
135 /*
136  * This hcall is used to determine the capabilities from the hypervisor.
137  * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
138  * @query_type: If 0 is passed, the hypervisor returns the overall
139  *		capabilities which provides all feature(s) that are
140  *		available. Then query the hypervisor to get the
141  *		corresponding capabilities for the specific feature.
142  *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
143  *			and VAS GZIP Default capabilities.
144  *			H_QUERY_NX_CAPABILITIES provides NX GZIP
145  *			capabilities.
146  * @result: Return buffer to save capabilities.
147  */
148 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
149 {
150 	long rc;
151 
152 	rc = plpar_hcall_norets(hcall, query_type, result);
153 
154 	if (rc == H_SUCCESS)
155 		return 0;
156 
157 	/* H_FUNCTION means HV does not support VAS so don't print an error */
158 	if (rc != H_FUNCTION) {
159 		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
160 			(hcall == H_QUERY_VAS_CAPABILITIES) ?
161 				"H_QUERY_VAS_CAPABILITIES" :
162 				"H_QUERY_NX_CAPABILITIES",
163 			rc, query_type, result);
164 	}
165 
166 	return -EIO;
167 }
168 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
169 
170 /*
171  * hcall to get fault CRB from the hypervisor.
172  */
173 static int h_get_nx_fault(u32 winid, u64 buffer)
174 {
175 	long rc;
176 
177 	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
178 
179 	if (rc == H_SUCCESS)
180 		return 0;
181 
182 	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
183 		rc, winid, buffer);
184 	return -EIO;
185 
186 }
187 
188 /*
189  * Handle the fault interrupt.
190  * When the fault interrupt is received for each window, query the
191  * hypervisor to get the fault CRB on the specific fault. Then
192  * process the CRB by updating CSB or send signal if the user space
193  * CSB is invalid.
194  * Note: The hypervisor forwards an interrupt for each fault request.
195  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
196  */
197 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
198 {
199 	struct pseries_vas_window *txwin = data;
200 	struct coprocessor_request_block crb;
201 	struct vas_user_win_ref *tsk_ref;
202 	int rc;
203 
204 	while (atomic_read(&txwin->pending_faults)) {
205 		rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
206 		if (!rc) {
207 			tsk_ref = &txwin->vas_win.task_ref;
208 			vas_dump_crb(&crb);
209 			vas_update_csb(&crb, tsk_ref);
210 		}
211 		atomic_dec(&txwin->pending_faults);
212 	}
213 
214 	return IRQ_HANDLED;
215 }
216 
217 /*
218  * irq_default_primary_handler() can be used only with IRQF_ONESHOT
219  * which disables IRQ before executing the thread handler and enables
220  * it after. But this disabling interrupt sets the VAS IRQ OFF
221  * state in the hypervisor. If the NX generates fault interrupt
222  * during this window, the hypervisor will not deliver this
223  * interrupt to the LPAR. So use VAS specific IRQ handler instead
224  * of calling the default primary handler.
225  */
226 static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
227 {
228 	struct pseries_vas_window *txwin = data;
229 
230 	/*
231 	 * The thread handler will process this interrupt if it is
232 	 * already running.
233 	 */
234 	atomic_inc(&txwin->pending_faults);
235 
236 	return IRQ_WAKE_THREAD;
237 }
238 
239 /*
240  * Allocate window and setup IRQ mapping.
241  */
242 static int allocate_setup_window(struct pseries_vas_window *txwin,
243 				 u64 *domain, u8 wintype)
244 {
245 	int rc;
246 
247 	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
248 	if (rc)
249 		return rc;
250 	/*
251 	 * On PowerVM, the hypervisor setup and forwards the fault
252 	 * interrupt per window. So the IRQ setup and fault handling
253 	 * will be done for each open window separately.
254 	 */
255 	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
256 	if (!txwin->fault_virq) {
257 		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
258 		rc = -EINVAL;
259 		goto out_win;
260 	}
261 
262 	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
263 				txwin->vas_win.winid);
264 	if (!txwin->name) {
265 		rc = -ENOMEM;
266 		goto out_irq;
267 	}
268 
269 	rc = request_threaded_irq(txwin->fault_virq,
270 				  pseries_vas_irq_handler,
271 				  pseries_vas_fault_thread_fn, 0,
272 				  txwin->name, txwin);
273 	if (rc) {
274 		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
275 		       txwin->vas_win.winid, txwin->fault_virq, rc);
276 		goto out_free;
277 	}
278 
279 	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
280 
281 	return 0;
282 out_free:
283 	kfree(txwin->name);
284 out_irq:
285 	irq_dispose_mapping(txwin->fault_virq);
286 out_win:
287 	h_deallocate_vas_window(txwin->vas_win.winid);
288 	return rc;
289 }
290 
291 static inline void free_irq_setup(struct pseries_vas_window *txwin)
292 {
293 	free_irq(txwin->fault_virq, txwin);
294 	kfree(txwin->name);
295 	irq_dispose_mapping(txwin->fault_virq);
296 }
297 
298 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
299 					      enum vas_cop_type cop_type)
300 {
301 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
302 	struct vas_cop_feat_caps *cop_feat_caps;
303 	struct vas_caps *caps;
304 	struct pseries_vas_window *txwin;
305 	int rc;
306 
307 	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
308 	if (!txwin)
309 		return ERR_PTR(-ENOMEM);
310 
311 	/*
312 	 * A VAS window can have many credits which means that many
313 	 * requests can be issued simultaneously. But the hypervisor
314 	 * restricts one credit per window.
315 	 * The hypervisor introduces 2 different types of credits:
316 	 * Default credit type (Uses normal priority FIFO):
317 	 *	A limited number of credits are assigned to partitions
318 	 *	based on processor entitlement. But these credits may be
319 	 *	over-committed on a system depends on whether the CPUs
320 	 *	are in shared or dedicated modes - that is, more requests
321 	 *	may be issued across the system than NX can service at
322 	 *	once which can result in paste command failure (RMA_busy).
323 	 *	Then the process has to resend requests or fall-back to
324 	 *	SW compression.
325 	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
326 	 *	To avoid NX HW contention, the system admins can assign
327 	 *	QoS credits for each LPAR so that this partition is
328 	 *	guaranteed access to NX resources. These credits are
329 	 *	assigned to partitions via the HMC.
330 	 *	Refer PAPR for more information.
331 	 *
332 	 * Allocate window with QoS credits if user requested. Otherwise
333 	 * default credits are used.
334 	 */
335 	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
336 		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
337 	else
338 		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
339 
340 	cop_feat_caps = &caps->caps;
341 
342 	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
343 			atomic_read(&cop_feat_caps->nr_total_credits)) {
344 		pr_err_ratelimited("Credits are not available to allocate window\n");
345 		rc = -EINVAL;
346 		goto out;
347 	}
348 
349 	if (vas_id == -1) {
350 		/*
351 		 * The user space is requesting to allocate a window on
352 		 * a VAS instance where the process is executing.
353 		 * On PowerVM, domain values are passed to the hypervisor
354 		 * to select VAS instance. Useful if the process is
355 		 * affinity to NUMA node.
356 		 * The hypervisor selects VAS instance if
357 		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
358 		 * The h_allocate_vas_window hcall is defined to take a
359 		 * domain values as specified by h_home_node_associativity,
360 		 * So no unpacking needs to be done.
361 		 */
362 		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
363 				  VPHN_FLAG_VCPU, hard_smp_processor_id());
364 		if (rc != H_SUCCESS) {
365 			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
366 			goto out;
367 		}
368 	}
369 
370 	txwin->pid = mfspr(SPRN_PID);
371 
372 	/*
373 	 * Allocate / Deallocate window hcalls and setup / free IRQs
374 	 * have to be protected with mutex.
375 	 * Open VAS window: Allocate window hcall and setup IRQ
376 	 * Close VAS window: Deallocate window hcall and free IRQ
377 	 *	The hypervisor waits until all NX requests are
378 	 *	completed before closing the window. So expects OS
379 	 *	to handle NX faults, means IRQ can be freed only
380 	 *	after the deallocate window hcall is returned.
381 	 * So once the window is closed with deallocate hcall before
382 	 * the IRQ is freed, it can be assigned to new allocate
383 	 * hcall with the same fault IRQ by the hypervisor. It can
384 	 * result in setup IRQ fail for the new window since the
385 	 * same fault IRQ is not freed by the OS before.
386 	 */
387 	mutex_lock(&vas_pseries_mutex);
388 	if (migration_in_progress) {
389 		rc = -EBUSY;
390 	} else {
391 		rc = allocate_setup_window(txwin, (u64 *)&domain[0],
392 				   cop_feat_caps->win_type);
393 		if (!rc)
394 			caps->nr_open_wins_progress++;
395 	}
396 
397 	mutex_unlock(&vas_pseries_mutex);
398 	if (rc)
399 		goto out;
400 
401 	/*
402 	 * Modify window and it is ready to use.
403 	 */
404 	rc = h_modify_vas_window(txwin);
405 	if (!rc)
406 		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
407 	if (rc)
408 		goto out_free;
409 
410 	txwin->win_type = cop_feat_caps->win_type;
411 
412 	/*
413 	 * The migration SUSPEND thread sets migration_in_progress and
414 	 * closes all open windows from the list. But the window is
415 	 * added to the list after open and modify HCALLs. So possible
416 	 * that migration_in_progress is set before modify HCALL which
417 	 * may cause some windows are still open when the hypervisor
418 	 * initiates the migration.
419 	 * So checks the migration_in_progress flag again and close all
420 	 * open windows.
421 	 *
422 	 * Possible to lose the acquired credit with DLPAR core
423 	 * removal after the window is opened. So if there are any
424 	 * closed windows (means with lost credits), do not give new
425 	 * window to user space. New windows will be opened only
426 	 * after the existing windows are reopened when credits are
427 	 * available.
428 	 */
429 	mutex_lock(&vas_pseries_mutex);
430 	if (!caps->nr_close_wins && !migration_in_progress) {
431 		list_add(&txwin->win_list, &caps->list);
432 		caps->nr_open_windows++;
433 		caps->nr_open_wins_progress--;
434 		mutex_unlock(&vas_pseries_mutex);
435 		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
436 		return &txwin->vas_win;
437 	}
438 	mutex_unlock(&vas_pseries_mutex);
439 
440 	put_vas_user_win_ref(&txwin->vas_win.task_ref);
441 	rc = -EBUSY;
442 	pr_err_ratelimited("No credit is available to allocate window\n");
443 
444 out_free:
445 	/*
446 	 * Window is not operational. Free IRQ before closing
447 	 * window so that do not have to hold mutex.
448 	 */
449 	free_irq_setup(txwin);
450 	h_deallocate_vas_window(txwin->vas_win.winid);
451 	/*
452 	 * Hold mutex and reduce nr_open_wins_progress counter.
453 	 */
454 	mutex_lock(&vas_pseries_mutex);
455 	caps->nr_open_wins_progress--;
456 	mutex_unlock(&vas_pseries_mutex);
457 out:
458 	atomic_dec(&cop_feat_caps->nr_used_credits);
459 	kfree(txwin);
460 	return ERR_PTR(rc);
461 }
462 
463 static u64 vas_paste_address(struct vas_window *vwin)
464 {
465 	struct pseries_vas_window *win;
466 
467 	win = container_of(vwin, struct pseries_vas_window, vas_win);
468 	return win->win_addr;
469 }
470 
471 static int deallocate_free_window(struct pseries_vas_window *win)
472 {
473 	int rc = 0;
474 
475 	/*
476 	 * The hypervisor waits for all requests including faults
477 	 * are processed before closing the window - Means all
478 	 * credits have to be returned. In the case of fault
479 	 * request, a credit is returned after OS issues
480 	 * H_GET_NX_FAULT hcall.
481 	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
482 	 * hcall.
483 	 */
484 	rc = h_deallocate_vas_window(win->vas_win.winid);
485 	if (!rc)
486 		free_irq_setup(win);
487 
488 	return rc;
489 }
490 
491 static int vas_deallocate_window(struct vas_window *vwin)
492 {
493 	struct pseries_vas_window *win;
494 	struct vas_cop_feat_caps *caps;
495 	int rc = 0;
496 
497 	if (!vwin)
498 		return -EINVAL;
499 
500 	win = container_of(vwin, struct pseries_vas_window, vas_win);
501 
502 	/* Should not happen */
503 	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
504 		pr_err("Window (%u): Invalid window type %u\n",
505 				vwin->winid, win->win_type);
506 		return -EINVAL;
507 	}
508 
509 	caps = &vascaps[win->win_type].caps;
510 	mutex_lock(&vas_pseries_mutex);
511 	/*
512 	 * VAS window is already closed in the hypervisor when
513 	 * lost the credit or with migration. So just remove the entry
514 	 * from the list, remove task references and free vas_window
515 	 * struct.
516 	 */
517 	if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
518 		!(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
519 		rc = deallocate_free_window(win);
520 		if (rc) {
521 			mutex_unlock(&vas_pseries_mutex);
522 			return rc;
523 		}
524 	} else
525 		vascaps[win->win_type].nr_close_wins--;
526 
527 	list_del(&win->win_list);
528 	atomic_dec(&caps->nr_used_credits);
529 	vascaps[win->win_type].nr_open_windows--;
530 	mutex_unlock(&vas_pseries_mutex);
531 
532 	mm_context_remove_vas_window(vwin->task_ref.mm);
533 	put_vas_user_win_ref(&vwin->task_ref);
534 
535 	kfree(win);
536 	return 0;
537 }
538 
539 static const struct vas_user_win_ops vops_pseries = {
540 	.open_win	= vas_allocate_window,	/* Open and configure window */
541 	.paste_addr	= vas_paste_address,	/* To do copy/paste */
542 	.close_win	= vas_deallocate_window, /* Close window */
543 };
544 
545 /*
546  * Supporting only nx-gzip coprocessor type now, but this API code
547  * extended to other coprocessor types later.
548  */
549 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
550 			     const char *name)
551 {
552 	if (!copypaste_feat)
553 		return -ENOTSUPP;
554 
555 	return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
556 }
557 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
558 
559 void vas_unregister_api_pseries(void)
560 {
561 	vas_unregister_coproc_api();
562 }
563 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
564 
565 /*
566  * Get the specific capabilities based on the feature type.
567  * Right now supports GZIP default and GZIP QoS capabilities.
568  */
569 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
570 				struct hv_vas_cop_feat_caps *hv_caps)
571 {
572 	struct vas_cop_feat_caps *caps;
573 	struct vas_caps *vcaps;
574 	int rc = 0;
575 
576 	vcaps = &vascaps[type];
577 	memset(vcaps, 0, sizeof(*vcaps));
578 	INIT_LIST_HEAD(&vcaps->list);
579 
580 	vcaps->feat = feat;
581 	caps = &vcaps->caps;
582 
583 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
584 					  (u64)virt_to_phys(hv_caps));
585 	if (rc)
586 		return rc;
587 
588 	caps->user_mode = hv_caps->user_mode;
589 	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
590 		pr_err("User space COPY/PASTE is not supported\n");
591 		return -ENOTSUPP;
592 	}
593 
594 	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
595 	caps->win_type = hv_caps->win_type;
596 	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
597 		pr_err("Unsupported window type %u\n", caps->win_type);
598 		return -EINVAL;
599 	}
600 	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
601 	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
602 	atomic_set(&caps->nr_total_credits,
603 		   be16_to_cpu(hv_caps->target_lpar_creds));
604 	if (feat == VAS_GZIP_DEF_FEAT) {
605 		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
606 
607 		if (caps->max_win_creds < DEF_WIN_CREDS) {
608 			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
609 			       DEF_WIN_CREDS, caps->max_win_creds);
610 			return -EINVAL;
611 		}
612 	}
613 
614 	rc = sysfs_add_vas_caps(caps);
615 	if (rc)
616 		return rc;
617 
618 	copypaste_feat = true;
619 
620 	return 0;
621 }
622 
623 /*
624  * VAS windows can be closed due to lost credits when the core is
625  * removed. So reopen them if credits are available due to DLPAR
626  * core add and set the window active status. When NX sees the page
627  * fault on the unmapped paste address, the kernel handles the fault
628  * by setting the remapping to new paste address if the window is
629  * active.
630  */
631 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
632 				 bool migrate)
633 {
634 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
635 	struct vas_cop_feat_caps *caps = &vcaps->caps;
636 	struct pseries_vas_window *win = NULL, *tmp;
637 	int rc, mv_ents = 0;
638 	int flag;
639 
640 	/*
641 	 * Nothing to do if there are no closed windows.
642 	 */
643 	if (!vcaps->nr_close_wins)
644 		return 0;
645 
646 	/*
647 	 * For the core removal, the hypervisor reduces the credits
648 	 * assigned to the LPAR and the kernel closes VAS windows
649 	 * in the hypervisor depends on reduced credits. The kernel
650 	 * uses LIFO (the last windows that are opened will be closed
651 	 * first) and expects to open in the same order when credits
652 	 * are available.
653 	 * For example, 40 windows are closed when the LPAR lost 2 cores
654 	 * (dedicated). If 1 core is added, this LPAR can have 20 more
655 	 * credits. It means the kernel can reopen 20 windows. So move
656 	 * 20 entries in the VAS windows lost and reopen next 20 windows.
657 	 * For partition migration, reopen all windows that are closed
658 	 * during resume.
659 	 */
660 	if ((vcaps->nr_close_wins > creds) && !migrate)
661 		mv_ents = vcaps->nr_close_wins - creds;
662 
663 	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
664 		if (!mv_ents)
665 			break;
666 
667 		mv_ents--;
668 	}
669 
670 	/*
671 	 * Open windows if they are closed only with migration or
672 	 * DLPAR (lost credit) before.
673 	 */
674 	if (migrate)
675 		flag = VAS_WIN_MIGRATE_CLOSE;
676 	else
677 		flag = VAS_WIN_NO_CRED_CLOSE;
678 
679 	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
680 		/*
681 		 * This window is closed with DLPAR and migration events.
682 		 * So reopen the window with the last event.
683 		 * The user space is not suspended with the current
684 		 * migration notifier. So the user space can issue DLPAR
685 		 * CPU hotplug while migration in progress. In this case
686 		 * this window will be opened with the last event.
687 		 */
688 		if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
689 			(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
690 			win->vas_win.status &= ~flag;
691 			continue;
692 		}
693 
694 		/*
695 		 * Nothing to do on this window if it is not closed
696 		 * with this flag
697 		 */
698 		if (!(win->vas_win.status & flag))
699 			continue;
700 
701 		rc = allocate_setup_window(win, (u64 *)&domain[0],
702 					   caps->win_type);
703 		if (rc)
704 			return rc;
705 
706 		rc = h_modify_vas_window(win);
707 		if (rc)
708 			goto out;
709 
710 		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
711 		/*
712 		 * Set window status to active
713 		 */
714 		win->vas_win.status &= ~flag;
715 		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
716 		win->win_type = caps->win_type;
717 		if (!--vcaps->nr_close_wins)
718 			break;
719 	}
720 
721 	return 0;
722 out:
723 	/*
724 	 * Window modify HCALL failed. So close the window to the
725 	 * hypervisor and return.
726 	 */
727 	free_irq_setup(win);
728 	h_deallocate_vas_window(win->vas_win.winid);
729 	return rc;
730 }
731 
732 /*
733  * The hypervisor reduces the available credits if the LPAR lost core. It
734  * means the excessive windows should not be active and the user space
735  * should not be using these windows to send compression requests to NX.
736  * So the kernel closes the excessive windows and unmap the paste address
737  * such that the user space receives paste instruction failure. Then up to
738  * the user space to fall back to SW compression and manage with the
739  * existing windows.
740  */
741 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
742 									bool migrate)
743 {
744 	struct pseries_vas_window *win, *tmp;
745 	struct vas_user_win_ref *task_ref;
746 	struct vm_area_struct *vma;
747 	int rc = 0, flag;
748 
749 	if (migrate)
750 		flag = VAS_WIN_MIGRATE_CLOSE;
751 	else
752 		flag = VAS_WIN_NO_CRED_CLOSE;
753 
754 	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
755 		/*
756 		 * This window is already closed due to lost credit
757 		 * or for migration before. Go for next window.
758 		 * For migration, nothing to do since this window
759 		 * closed for DLPAR and will be reopened even on
760 		 * the destination system with other DLPAR operation.
761 		 */
762 		if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
763 			(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
764 			win->vas_win.status |= flag;
765 			continue;
766 		}
767 
768 		task_ref = &win->vas_win.task_ref;
769 		/*
770 		 * VAS mmap (coproc_mmap()) and its fault handler
771 		 * (vas_mmap_fault()) are called after holding mmap lock.
772 		 * So hold mmap mutex after mmap_lock to avoid deadlock.
773 		 */
774 		mmap_write_lock(task_ref->mm);
775 		mutex_lock(&task_ref->mmap_mutex);
776 		vma = task_ref->vma;
777 		/*
778 		 * Number of available credits are reduced, So select
779 		 * and close windows.
780 		 */
781 		win->vas_win.status |= flag;
782 
783 		/*
784 		 * vma is set in the original mapping. But this mapping
785 		 * is done with mmap() after the window is opened with ioctl.
786 		 * so we may not see the original mapping if the core remove
787 		 * is done before the original mmap() and after the ioctl.
788 		 */
789 		if (vma)
790 			zap_vma_pages(vma);
791 
792 		mutex_unlock(&task_ref->mmap_mutex);
793 		mmap_write_unlock(task_ref->mm);
794 		/*
795 		 * Close VAS window in the hypervisor, but do not
796 		 * free vas_window struct since it may be reused
797 		 * when the credit is available later (DLPAR with
798 		 * adding cores). This struct will be used
799 		 * later when the process issued with close(FD).
800 		 */
801 		rc = deallocate_free_window(win);
802 		/*
803 		 * This failure is from the hypervisor.
804 		 * No way to stop migration for these failures.
805 		 * So ignore error and continue closing other windows.
806 		 */
807 		if (rc && !migrate)
808 			return rc;
809 
810 		vcap->nr_close_wins++;
811 
812 		/*
813 		 * For migration, do not depend on lpar_creds in case if
814 		 * mismatch with the hypervisor value (should not happen).
815 		 * So close all active windows in the list and will be
816 		 * reopened windows based on the new lpar_creds on the
817 		 * destination system during resume.
818 		 */
819 		if (!migrate && !--excess_creds)
820 			break;
821 	}
822 
823 	return 0;
824 }
825 
826 /*
827  * Get new VAS capabilities when the core add/removal configuration
828  * changes. Reconfig window configurations based on the credits
829  * availability from this new capabilities.
830  */
831 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
832 {
833 	struct vas_cop_feat_caps *caps;
834 	int old_nr_creds;
835 	struct vas_caps *vcaps;
836 	int rc = 0, nr_active_wins;
837 
838 	if (type >= VAS_MAX_FEAT_TYPE) {
839 		pr_err("Invalid credit type %d\n", type);
840 		return -EINVAL;
841 	}
842 
843 	vcaps = &vascaps[type];
844 	caps = &vcaps->caps;
845 
846 	mutex_lock(&vas_pseries_mutex);
847 
848 	old_nr_creds = atomic_read(&caps->nr_total_credits);
849 
850 	atomic_set(&caps->nr_total_credits, new_nr_creds);
851 	/*
852 	 * The total number of available credits may be decreased or
853 	 * increased with DLPAR operation. Means some windows have to be
854 	 * closed / reopened. Hold the vas_pseries_mutex so that the
855 	 * user space can not open new windows.
856 	 */
857 	if (old_nr_creds <  new_nr_creds) {
858 		/*
859 		 * If the existing target credits is less than the new
860 		 * target, reopen windows if they are closed due to
861 		 * the previous DLPAR (core removal).
862 		 */
863 		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
864 					   false);
865 	} else {
866 		/*
867 		 * # active windows is more than new LPAR available
868 		 * credits. So close the excessive windows.
869 		 * On pseries, each window will have 1 credit.
870 		 */
871 		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
872 		if (nr_active_wins > new_nr_creds)
873 			rc = reconfig_close_windows(vcaps,
874 					nr_active_wins - new_nr_creds,
875 					false);
876 	}
877 
878 	mutex_unlock(&vas_pseries_mutex);
879 	return rc;
880 }
881 
882 int pseries_vas_dlpar_cpu(void)
883 {
884 	int new_nr_creds, rc;
885 
886 	/*
887 	 * NX-GZIP is not enabled. Nothing to do for DLPAR event
888 	 */
889 	if (!copypaste_feat)
890 		return 0;
891 
892 
893 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
894 				      vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
895 				      (u64)virt_to_phys(&hv_cop_caps));
896 	if (!rc) {
897 		new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
898 		rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
899 	}
900 
901 	if (rc)
902 		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
903 
904 	return rc;
905 }
906 
907 /*
908  * Total number of default credits available (target_credits)
909  * in LPAR depends on number of cores configured. It varies based on
910  * whether processors are in shared mode or dedicated mode.
911  * Get the notifier when CPU configuration is changed with DLPAR
912  * operation so that get the new target_credits (vas default capabilities)
913  * and then update the existing windows usage if needed.
914  */
915 static int pseries_vas_notifier(struct notifier_block *nb,
916 				unsigned long action, void *data)
917 {
918 	struct of_reconfig_data *rd = data;
919 	struct device_node *dn = rd->dn;
920 	const __be32 *intserv = NULL;
921 	int len;
922 
923 	/*
924 	 * For shared CPU partition, the hypervisor assigns total credits
925 	 * based on entitled core capacity. So updating VAS windows will
926 	 * be called from lparcfg_write().
927 	 */
928 	if (is_shared_processor())
929 		return NOTIFY_OK;
930 
931 	if ((action == OF_RECONFIG_ATTACH_NODE) ||
932 		(action == OF_RECONFIG_DETACH_NODE))
933 		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
934 					  &len);
935 	/*
936 	 * Processor config is not changed
937 	 */
938 	if (!intserv)
939 		return NOTIFY_OK;
940 
941 	return pseries_vas_dlpar_cpu();
942 }
943 
944 static struct notifier_block pseries_vas_nb = {
945 	.notifier_call = pseries_vas_notifier,
946 };
947 
948 /*
949  * For LPM, all windows have to be closed on the source partition
950  * before migration and reopen them on the destination partition
951  * after migration. So closing windows during suspend and
952  * reopen them during resume.
953  */
954 int vas_migration_handler(int action)
955 {
956 	struct vas_cop_feat_caps *caps;
957 	int old_nr_creds, new_nr_creds = 0;
958 	struct vas_caps *vcaps;
959 	int i, rc = 0;
960 
961 	pr_info("VAS migration event %d\n", action);
962 
963 	/*
964 	 * NX-GZIP is not enabled. Nothing to do for migration.
965 	 */
966 	if (!copypaste_feat)
967 		return rc;
968 
969 	if (action == VAS_SUSPEND)
970 		migration_in_progress = true;
971 	else
972 		migration_in_progress = false;
973 
974 	for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
975 		vcaps = &vascaps[i];
976 		caps = &vcaps->caps;
977 		old_nr_creds = atomic_read(&caps->nr_total_credits);
978 
979 		rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
980 					      vcaps->feat,
981 					      (u64)virt_to_phys(&hv_cop_caps));
982 		if (!rc) {
983 			new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
984 			/*
985 			 * Should not happen. But incase print messages, close
986 			 * all windows in the list during suspend and reopen
987 			 * windows based on new lpar_creds on the destination
988 			 * system.
989 			 */
990 			if (old_nr_creds != new_nr_creds) {
991 				pr_err("Target credits mismatch with the hypervisor\n");
992 				pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
993 					action, old_nr_creds, new_nr_creds);
994 				pr_err("Used creds: %d, Active creds: %d\n",
995 					atomic_read(&caps->nr_used_credits),
996 					vcaps->nr_open_windows - vcaps->nr_close_wins);
997 			}
998 		} else {
999 			pr_err("state(%d): Get VAS capabilities failed with %d\n",
1000 				action, rc);
1001 			/*
1002 			 * We can not stop migration with the current lpm
1003 			 * implementation. So continue closing all windows in
1004 			 * the list (during suspend) and return without
1005 			 * opening windows (during resume) if VAS capabilities
1006 			 * HCALL failed.
1007 			 */
1008 			if (action == VAS_RESUME)
1009 				goto out;
1010 		}
1011 
1012 		switch (action) {
1013 		case VAS_SUSPEND:
1014 			mutex_lock(&vas_pseries_mutex);
1015 			rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
1016 							true);
1017 			/*
1018 			 * Windows are included in the list after successful
1019 			 * open. So wait for closing these in-progress open
1020 			 * windows in vas_allocate_window() which will be
1021 			 * done if the migration_in_progress is set.
1022 			 */
1023 			while (vcaps->nr_open_wins_progress) {
1024 				mutex_unlock(&vas_pseries_mutex);
1025 				msleep(10);
1026 				mutex_lock(&vas_pseries_mutex);
1027 			}
1028 			mutex_unlock(&vas_pseries_mutex);
1029 			break;
1030 		case VAS_RESUME:
1031 			mutex_lock(&vas_pseries_mutex);
1032 			atomic_set(&caps->nr_total_credits, new_nr_creds);
1033 			rc = reconfig_open_windows(vcaps, new_nr_creds, true);
1034 			mutex_unlock(&vas_pseries_mutex);
1035 			break;
1036 		default:
1037 			/* should not happen */
1038 			pr_err("Invalid migration action %d\n", action);
1039 			rc = -EINVAL;
1040 			goto out;
1041 		}
1042 
1043 		/*
1044 		 * Ignore errors during suspend and return for resume.
1045 		 */
1046 		if (rc && (action == VAS_RESUME))
1047 			goto out;
1048 	}
1049 
1050 	pr_info("VAS migration event (%d) successful\n", action);
1051 
1052 out:
1053 	return rc;
1054 }
1055 
1056 static int __init pseries_vas_init(void)
1057 {
1058 	struct hv_vas_all_caps *hv_caps;
1059 	int rc = 0;
1060 
1061 	/*
1062 	 * Linux supports user space COPY/PASTE only with Radix
1063 	 */
1064 	if (!radix_enabled()) {
1065 		copypaste_feat = false;
1066 		pr_err("API is supported only with radix page tables\n");
1067 		return -ENOTSUPP;
1068 	}
1069 
1070 	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
1071 	if (!hv_caps)
1072 		return -ENOMEM;
1073 	/*
1074 	 * Get VAS overall capabilities by passing 0 to feature type.
1075 	 */
1076 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
1077 					  (u64)virt_to_phys(hv_caps));
1078 	if (rc)
1079 		goto out;
1080 
1081 	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
1082 	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
1083 
1084 	sysfs_pseries_vas_init(&caps_all);
1085 
1086 	/*
1087 	 * QOS capabilities available
1088 	 */
1089 	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1090 		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1091 					  VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1092 
1093 		if (rc)
1094 			goto out;
1095 	}
1096 	/*
1097 	 * Default capabilities available
1098 	 */
1099 	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1100 		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1101 					  VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1102 
1103 	if (!rc && copypaste_feat) {
1104 		if (firmware_has_feature(FW_FEATURE_LPAR))
1105 			of_reconfig_notifier_register(&pseries_vas_nb);
1106 
1107 		pr_info("GZIP feature is available\n");
1108 	} else {
1109 		/*
1110 		 * Should not happen, but only when get default
1111 		 * capabilities HCALL failed. So disable copy paste
1112 		 * feature.
1113 		 */
1114 		copypaste_feat = false;
1115 	}
1116 
1117 out:
1118 	kfree(hv_caps);
1119 	return rc;
1120 }
1121 machine_device_initcall(pseries, pseries_vas_init);
1122