xref: /linux/arch/powerpc/platforms/pseries/vas.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2020-21 IBM Corp.
4  */
5 
6 #define pr_fmt(fmt) "vas: " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/vas.h>
20 #include "vas.h"
21 
22 #define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
23 #define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
24 /* The hypervisor allows one credit per window right now */
25 #define DEF_WIN_CREDS		1
26 
27 static struct vas_all_caps caps_all;
28 static bool copypaste_feat;
29 static struct hv_vas_cop_feat_caps hv_cop_caps;
30 
31 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
32 static DEFINE_MUTEX(vas_pseries_mutex);
33 static bool migration_in_progress;
34 
35 static long hcall_return_busy_check(long rc)
36 {
37 	/* Check if we are stalled for some time */
38 	if (H_IS_LONG_BUSY(rc)) {
39 		msleep(get_longbusy_msecs(rc));
40 		rc = H_BUSY;
41 	} else if (rc == H_BUSY) {
42 		cond_resched();
43 	}
44 
45 	return rc;
46 }
47 
48 /*
49  * Allocate VAS window hcall
50  */
51 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
52 				     u8 wintype, u16 credits)
53 {
54 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
55 	long rc;
56 
57 	do {
58 		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
59 				  credits, domain[0], domain[1], domain[2],
60 				  domain[3], domain[4], domain[5]);
61 
62 		rc = hcall_return_busy_check(rc);
63 	} while (rc == H_BUSY);
64 
65 	if (rc == H_SUCCESS) {
66 		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
67 			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
68 			return -ENOTSUPP;
69 		}
70 		win->vas_win.winid = retbuf[0];
71 		win->win_addr = retbuf[1];
72 		win->complete_irq = retbuf[2];
73 		win->fault_irq = retbuf[3];
74 		return 0;
75 	}
76 
77 	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
78 		rc, wintype, credits);
79 
80 	return -EIO;
81 }
82 
83 /*
84  * Deallocate VAS window hcall.
85  */
86 static int h_deallocate_vas_window(u64 winid)
87 {
88 	long rc;
89 
90 	do {
91 		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
92 
93 		rc = hcall_return_busy_check(rc);
94 	} while (rc == H_BUSY);
95 
96 	if (rc == H_SUCCESS)
97 		return 0;
98 
99 	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
100 		rc, winid);
101 	return -EIO;
102 }
103 
104 /*
105  * Modify VAS window.
106  * After the window is opened with allocate window hcall, configure it
107  * with flags and LPAR PID before using.
108  */
109 static int h_modify_vas_window(struct pseries_vas_window *win)
110 {
111 	long rc;
112 
113 	/*
114 	 * AMR value is not supported in Linux VAS implementation.
115 	 * The hypervisor ignores it if 0 is passed.
116 	 */
117 	do {
118 		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
119 					win->vas_win.winid, win->pid, 0,
120 					VAS_MOD_WIN_FLAGS, 0);
121 
122 		rc = hcall_return_busy_check(rc);
123 	} while (rc == H_BUSY);
124 
125 	if (rc == H_SUCCESS)
126 		return 0;
127 
128 	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
129 			rc, win->vas_win.winid, win->pid);
130 	return -EIO;
131 }
132 
133 /*
134  * This hcall is used to determine the capabilities from the hypervisor.
135  * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
136  * @query_type: If 0 is passed, the hypervisor returns the overall
137  *		capabilities which provides all feature(s) that are
138  *		available. Then query the hypervisor to get the
139  *		corresponding capabilities for the specific feature.
140  *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
141  *			and VAS GZIP Default capabilities.
142  *			H_QUERY_NX_CAPABILITIES provides NX GZIP
143  *			capabilities.
144  * @result: Return buffer to save capabilities.
145  */
146 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
147 {
148 	long rc;
149 
150 	rc = plpar_hcall_norets(hcall, query_type, result);
151 
152 	if (rc == H_SUCCESS)
153 		return 0;
154 
155 	/* H_FUNCTION means HV does not support VAS so don't print an error */
156 	if (rc != H_FUNCTION) {
157 		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
158 			(hcall == H_QUERY_VAS_CAPABILITIES) ?
159 				"H_QUERY_VAS_CAPABILITIES" :
160 				"H_QUERY_NX_CAPABILITIES",
161 			rc, query_type, result);
162 	}
163 
164 	return -EIO;
165 }
166 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
167 
168 /*
169  * hcall to get fault CRB from the hypervisor.
170  */
171 static int h_get_nx_fault(u32 winid, u64 buffer)
172 {
173 	long rc;
174 
175 	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
176 
177 	if (rc == H_SUCCESS)
178 		return 0;
179 
180 	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
181 		rc, winid, buffer);
182 	return -EIO;
183 
184 }
185 
186 /*
187  * Handle the fault interrupt.
188  * When the fault interrupt is received for each window, query the
189  * hypervisor to get the fault CRB on the specific fault. Then
190  * process the CRB by updating CSB or send signal if the user space
191  * CSB is invalid.
192  * Note: The hypervisor forwards an interrupt for each fault request.
193  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
194  */
195 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
196 {
197 	struct pseries_vas_window *txwin = data;
198 	struct coprocessor_request_block crb;
199 	struct vas_user_win_ref *tsk_ref;
200 	int rc;
201 
202 	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
203 	if (!rc) {
204 		tsk_ref = &txwin->vas_win.task_ref;
205 		vas_dump_crb(&crb);
206 		vas_update_csb(&crb, tsk_ref);
207 	}
208 
209 	return IRQ_HANDLED;
210 }
211 
212 /*
213  * Allocate window and setup IRQ mapping.
214  */
215 static int allocate_setup_window(struct pseries_vas_window *txwin,
216 				 u64 *domain, u8 wintype)
217 {
218 	int rc;
219 
220 	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
221 	if (rc)
222 		return rc;
223 	/*
224 	 * On PowerVM, the hypervisor setup and forwards the fault
225 	 * interrupt per window. So the IRQ setup and fault handling
226 	 * will be done for each open window separately.
227 	 */
228 	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
229 	if (!txwin->fault_virq) {
230 		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
231 		rc = -EINVAL;
232 		goto out_win;
233 	}
234 
235 	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
236 				txwin->vas_win.winid);
237 	if (!txwin->name) {
238 		rc = -ENOMEM;
239 		goto out_irq;
240 	}
241 
242 	rc = request_threaded_irq(txwin->fault_virq, NULL,
243 				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
244 				  txwin->name, txwin);
245 	if (rc) {
246 		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
247 		       txwin->vas_win.winid, txwin->fault_virq, rc);
248 		goto out_free;
249 	}
250 
251 	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
252 
253 	return 0;
254 out_free:
255 	kfree(txwin->name);
256 out_irq:
257 	irq_dispose_mapping(txwin->fault_virq);
258 out_win:
259 	h_deallocate_vas_window(txwin->vas_win.winid);
260 	return rc;
261 }
262 
263 static inline void free_irq_setup(struct pseries_vas_window *txwin)
264 {
265 	free_irq(txwin->fault_virq, txwin);
266 	kfree(txwin->name);
267 	irq_dispose_mapping(txwin->fault_virq);
268 }
269 
270 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
271 					      enum vas_cop_type cop_type)
272 {
273 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
274 	struct vas_cop_feat_caps *cop_feat_caps;
275 	struct vas_caps *caps;
276 	struct pseries_vas_window *txwin;
277 	int rc;
278 
279 	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
280 	if (!txwin)
281 		return ERR_PTR(-ENOMEM);
282 
283 	/*
284 	 * A VAS window can have many credits which means that many
285 	 * requests can be issued simultaneously. But the hypervisor
286 	 * restricts one credit per window.
287 	 * The hypervisor introduces 2 different types of credits:
288 	 * Default credit type (Uses normal priority FIFO):
289 	 *	A limited number of credits are assigned to partitions
290 	 *	based on processor entitlement. But these credits may be
291 	 *	over-committed on a system depends on whether the CPUs
292 	 *	are in shared or dedicated modes - that is, more requests
293 	 *	may be issued across the system than NX can service at
294 	 *	once which can result in paste command failure (RMA_busy).
295 	 *	Then the process has to resend requests or fall-back to
296 	 *	SW compression.
297 	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
298 	 *	To avoid NX HW contention, the system admins can assign
299 	 *	QoS credits for each LPAR so that this partition is
300 	 *	guaranteed access to NX resources. These credits are
301 	 *	assigned to partitions via the HMC.
302 	 *	Refer PAPR for more information.
303 	 *
304 	 * Allocate window with QoS credits if user requested. Otherwise
305 	 * default credits are used.
306 	 */
307 	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
308 		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
309 	else
310 		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
311 
312 	cop_feat_caps = &caps->caps;
313 
314 	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
315 			atomic_read(&cop_feat_caps->nr_total_credits)) {
316 		pr_err("Credits are not available to allocate window\n");
317 		rc = -EINVAL;
318 		goto out;
319 	}
320 
321 	if (vas_id == -1) {
322 		/*
323 		 * The user space is requesting to allocate a window on
324 		 * a VAS instance where the process is executing.
325 		 * On PowerVM, domain values are passed to the hypervisor
326 		 * to select VAS instance. Useful if the process is
327 		 * affinity to NUMA node.
328 		 * The hypervisor selects VAS instance if
329 		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
330 		 * The h_allocate_vas_window hcall is defined to take a
331 		 * domain values as specified by h_home_node_associativity,
332 		 * So no unpacking needs to be done.
333 		 */
334 		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
335 				  VPHN_FLAG_VCPU, smp_processor_id());
336 		if (rc != H_SUCCESS) {
337 			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
338 			goto out;
339 		}
340 	}
341 
342 	txwin->pid = mfspr(SPRN_PID);
343 
344 	/*
345 	 * Allocate / Deallocate window hcalls and setup / free IRQs
346 	 * have to be protected with mutex.
347 	 * Open VAS window: Allocate window hcall and setup IRQ
348 	 * Close VAS window: Deallocate window hcall and free IRQ
349 	 *	The hypervisor waits until all NX requests are
350 	 *	completed before closing the window. So expects OS
351 	 *	to handle NX faults, means IRQ can be freed only
352 	 *	after the deallocate window hcall is returned.
353 	 * So once the window is closed with deallocate hcall before
354 	 * the IRQ is freed, it can be assigned to new allocate
355 	 * hcall with the same fault IRQ by the hypervisor. It can
356 	 * result in setup IRQ fail for the new window since the
357 	 * same fault IRQ is not freed by the OS before.
358 	 */
359 	mutex_lock(&vas_pseries_mutex);
360 	if (migration_in_progress)
361 		rc = -EBUSY;
362 	else
363 		rc = allocate_setup_window(txwin, (u64 *)&domain[0],
364 				   cop_feat_caps->win_type);
365 	mutex_unlock(&vas_pseries_mutex);
366 	if (rc)
367 		goto out;
368 
369 	/*
370 	 * Modify window and it is ready to use.
371 	 */
372 	rc = h_modify_vas_window(txwin);
373 	if (!rc)
374 		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
375 	if (rc)
376 		goto out_free;
377 
378 	txwin->win_type = cop_feat_caps->win_type;
379 	mutex_lock(&vas_pseries_mutex);
380 	/*
381 	 * Possible to lose the acquired credit with DLPAR core
382 	 * removal after the window is opened. So if there are any
383 	 * closed windows (means with lost credits), do not give new
384 	 * window to user space. New windows will be opened only
385 	 * after the existing windows are reopened when credits are
386 	 * available.
387 	 */
388 	if (!caps->nr_close_wins) {
389 		list_add(&txwin->win_list, &caps->list);
390 		caps->nr_open_windows++;
391 		mutex_unlock(&vas_pseries_mutex);
392 		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
393 		return &txwin->vas_win;
394 	}
395 	mutex_unlock(&vas_pseries_mutex);
396 
397 	put_vas_user_win_ref(&txwin->vas_win.task_ref);
398 	rc = -EBUSY;
399 	pr_err("No credit is available to allocate window\n");
400 
401 out_free:
402 	/*
403 	 * Window is not operational. Free IRQ before closing
404 	 * window so that do not have to hold mutex.
405 	 */
406 	free_irq_setup(txwin);
407 	h_deallocate_vas_window(txwin->vas_win.winid);
408 out:
409 	atomic_dec(&cop_feat_caps->nr_used_credits);
410 	kfree(txwin);
411 	return ERR_PTR(rc);
412 }
413 
414 static u64 vas_paste_address(struct vas_window *vwin)
415 {
416 	struct pseries_vas_window *win;
417 
418 	win = container_of(vwin, struct pseries_vas_window, vas_win);
419 	return win->win_addr;
420 }
421 
422 static int deallocate_free_window(struct pseries_vas_window *win)
423 {
424 	int rc = 0;
425 
426 	/*
427 	 * The hypervisor waits for all requests including faults
428 	 * are processed before closing the window - Means all
429 	 * credits have to be returned. In the case of fault
430 	 * request, a credit is returned after OS issues
431 	 * H_GET_NX_FAULT hcall.
432 	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
433 	 * hcall.
434 	 */
435 	rc = h_deallocate_vas_window(win->vas_win.winid);
436 	if (!rc)
437 		free_irq_setup(win);
438 
439 	return rc;
440 }
441 
442 static int vas_deallocate_window(struct vas_window *vwin)
443 {
444 	struct pseries_vas_window *win;
445 	struct vas_cop_feat_caps *caps;
446 	int rc = 0;
447 
448 	if (!vwin)
449 		return -EINVAL;
450 
451 	win = container_of(vwin, struct pseries_vas_window, vas_win);
452 
453 	/* Should not happen */
454 	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
455 		pr_err("Window (%u): Invalid window type %u\n",
456 				vwin->winid, win->win_type);
457 		return -EINVAL;
458 	}
459 
460 	caps = &vascaps[win->win_type].caps;
461 	mutex_lock(&vas_pseries_mutex);
462 	/*
463 	 * VAS window is already closed in the hypervisor when
464 	 * lost the credit or with migration. So just remove the entry
465 	 * from the list, remove task references and free vas_window
466 	 * struct.
467 	 */
468 	if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
469 		!(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
470 		rc = deallocate_free_window(win);
471 		if (rc) {
472 			mutex_unlock(&vas_pseries_mutex);
473 			return rc;
474 		}
475 	} else
476 		vascaps[win->win_type].nr_close_wins--;
477 
478 	list_del(&win->win_list);
479 	atomic_dec(&caps->nr_used_credits);
480 	vascaps[win->win_type].nr_open_windows--;
481 	mutex_unlock(&vas_pseries_mutex);
482 
483 	put_vas_user_win_ref(&vwin->task_ref);
484 	mm_context_remove_vas_window(vwin->task_ref.mm);
485 
486 	kfree(win);
487 	return 0;
488 }
489 
490 static const struct vas_user_win_ops vops_pseries = {
491 	.open_win	= vas_allocate_window,	/* Open and configure window */
492 	.paste_addr	= vas_paste_address,	/* To do copy/paste */
493 	.close_win	= vas_deallocate_window, /* Close window */
494 };
495 
496 /*
497  * Supporting only nx-gzip coprocessor type now, but this API code
498  * extended to other coprocessor types later.
499  */
500 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
501 			     const char *name)
502 {
503 	int rc;
504 
505 	if (!copypaste_feat)
506 		return -ENOTSUPP;
507 
508 	rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
509 
510 	return rc;
511 }
512 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
513 
514 void vas_unregister_api_pseries(void)
515 {
516 	vas_unregister_coproc_api();
517 }
518 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
519 
520 /*
521  * Get the specific capabilities based on the feature type.
522  * Right now supports GZIP default and GZIP QoS capabilities.
523  */
524 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
525 				struct hv_vas_cop_feat_caps *hv_caps)
526 {
527 	struct vas_cop_feat_caps *caps;
528 	struct vas_caps *vcaps;
529 	int rc = 0;
530 
531 	vcaps = &vascaps[type];
532 	memset(vcaps, 0, sizeof(*vcaps));
533 	INIT_LIST_HEAD(&vcaps->list);
534 
535 	vcaps->feat = feat;
536 	caps = &vcaps->caps;
537 
538 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
539 					  (u64)virt_to_phys(hv_caps));
540 	if (rc)
541 		return rc;
542 
543 	caps->user_mode = hv_caps->user_mode;
544 	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
545 		pr_err("User space COPY/PASTE is not supported\n");
546 		return -ENOTSUPP;
547 	}
548 
549 	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
550 	caps->win_type = hv_caps->win_type;
551 	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
552 		pr_err("Unsupported window type %u\n", caps->win_type);
553 		return -EINVAL;
554 	}
555 	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
556 	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
557 	atomic_set(&caps->nr_total_credits,
558 		   be16_to_cpu(hv_caps->target_lpar_creds));
559 	if (feat == VAS_GZIP_DEF_FEAT) {
560 		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
561 
562 		if (caps->max_win_creds < DEF_WIN_CREDS) {
563 			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
564 			       DEF_WIN_CREDS, caps->max_win_creds);
565 			return -EINVAL;
566 		}
567 	}
568 
569 	rc = sysfs_add_vas_caps(caps);
570 	if (rc)
571 		return rc;
572 
573 	copypaste_feat = true;
574 
575 	return 0;
576 }
577 
578 /*
579  * VAS windows can be closed due to lost credits when the core is
580  * removed. So reopen them if credits are available due to DLPAR
581  * core add and set the window active status. When NX sees the page
582  * fault on the unmapped paste address, the kernel handles the fault
583  * by setting the remapping to new paste address if the window is
584  * active.
585  */
586 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
587 				 bool migrate)
588 {
589 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
590 	struct vas_cop_feat_caps *caps = &vcaps->caps;
591 	struct pseries_vas_window *win = NULL, *tmp;
592 	int rc, mv_ents = 0;
593 	int flag;
594 
595 	/*
596 	 * Nothing to do if there are no closed windows.
597 	 */
598 	if (!vcaps->nr_close_wins)
599 		return 0;
600 
601 	/*
602 	 * For the core removal, the hypervisor reduces the credits
603 	 * assigned to the LPAR and the kernel closes VAS windows
604 	 * in the hypervisor depends on reduced credits. The kernel
605 	 * uses LIFO (the last windows that are opened will be closed
606 	 * first) and expects to open in the same order when credits
607 	 * are available.
608 	 * For example, 40 windows are closed when the LPAR lost 2 cores
609 	 * (dedicated). If 1 core is added, this LPAR can have 20 more
610 	 * credits. It means the kernel can reopen 20 windows. So move
611 	 * 20 entries in the VAS windows lost and reopen next 20 windows.
612 	 * For partition migration, reopen all windows that are closed
613 	 * during resume.
614 	 */
615 	if ((vcaps->nr_close_wins > creds) && !migrate)
616 		mv_ents = vcaps->nr_close_wins - creds;
617 
618 	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
619 		if (!mv_ents)
620 			break;
621 
622 		mv_ents--;
623 	}
624 
625 	/*
626 	 * Open windows if they are closed only with migration or
627 	 * DLPAR (lost credit) before.
628 	 */
629 	if (migrate)
630 		flag = VAS_WIN_MIGRATE_CLOSE;
631 	else
632 		flag = VAS_WIN_NO_CRED_CLOSE;
633 
634 	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
635 		/*
636 		 * This window is closed with DLPAR and migration events.
637 		 * So reopen the window with the last event.
638 		 * The user space is not suspended with the current
639 		 * migration notifier. So the user space can issue DLPAR
640 		 * CPU hotplug while migration in progress. In this case
641 		 * this window will be opened with the last event.
642 		 */
643 		if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
644 			(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
645 			win->vas_win.status &= ~flag;
646 			continue;
647 		}
648 
649 		/*
650 		 * Nothing to do on this window if it is not closed
651 		 * with this flag
652 		 */
653 		if (!(win->vas_win.status & flag))
654 			continue;
655 
656 		rc = allocate_setup_window(win, (u64 *)&domain[0],
657 					   caps->win_type);
658 		if (rc)
659 			return rc;
660 
661 		rc = h_modify_vas_window(win);
662 		if (rc)
663 			goto out;
664 
665 		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
666 		/*
667 		 * Set window status to active
668 		 */
669 		win->vas_win.status &= ~flag;
670 		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
671 		win->win_type = caps->win_type;
672 		if (!--vcaps->nr_close_wins)
673 			break;
674 	}
675 
676 	return 0;
677 out:
678 	/*
679 	 * Window modify HCALL failed. So close the window to the
680 	 * hypervisor and return.
681 	 */
682 	free_irq_setup(win);
683 	h_deallocate_vas_window(win->vas_win.winid);
684 	return rc;
685 }
686 
687 /*
688  * The hypervisor reduces the available credits if the LPAR lost core. It
689  * means the excessive windows should not be active and the user space
690  * should not be using these windows to send compression requests to NX.
691  * So the kernel closes the excessive windows and unmap the paste address
692  * such that the user space receives paste instruction failure. Then up to
693  * the user space to fall back to SW compression and manage with the
694  * existing windows.
695  */
696 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
697 									bool migrate)
698 {
699 	struct pseries_vas_window *win, *tmp;
700 	struct vas_user_win_ref *task_ref;
701 	struct vm_area_struct *vma;
702 	int rc = 0, flag;
703 
704 	if (migrate)
705 		flag = VAS_WIN_MIGRATE_CLOSE;
706 	else
707 		flag = VAS_WIN_NO_CRED_CLOSE;
708 
709 	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
710 		/*
711 		 * This window is already closed due to lost credit
712 		 * or for migration before. Go for next window.
713 		 * For migration, nothing to do since this window
714 		 * closed for DLPAR and will be reopened even on
715 		 * the destination system with other DLPAR operation.
716 		 */
717 		if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
718 			(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
719 			win->vas_win.status |= flag;
720 			continue;
721 		}
722 
723 		task_ref = &win->vas_win.task_ref;
724 		mutex_lock(&task_ref->mmap_mutex);
725 		vma = task_ref->vma;
726 		/*
727 		 * Number of available credits are reduced, So select
728 		 * and close windows.
729 		 */
730 		win->vas_win.status |= flag;
731 
732 		mmap_write_lock(task_ref->mm);
733 		/*
734 		 * vma is set in the original mapping. But this mapping
735 		 * is done with mmap() after the window is opened with ioctl.
736 		 * so we may not see the original mapping if the core remove
737 		 * is done before the original mmap() and after the ioctl.
738 		 */
739 		if (vma)
740 			zap_page_range(vma, vma->vm_start,
741 					vma->vm_end - vma->vm_start);
742 
743 		mmap_write_unlock(task_ref->mm);
744 		mutex_unlock(&task_ref->mmap_mutex);
745 		/*
746 		 * Close VAS window in the hypervisor, but do not
747 		 * free vas_window struct since it may be reused
748 		 * when the credit is available later (DLPAR with
749 		 * adding cores). This struct will be used
750 		 * later when the process issued with close(FD).
751 		 */
752 		rc = deallocate_free_window(win);
753 		/*
754 		 * This failure is from the hypervisor.
755 		 * No way to stop migration for these failures.
756 		 * So ignore error and continue closing other windows.
757 		 */
758 		if (rc && !migrate)
759 			return rc;
760 
761 		vcap->nr_close_wins++;
762 
763 		/*
764 		 * For migration, do not depend on lpar_creds in case if
765 		 * mismatch with the hypervisor value (should not happen).
766 		 * So close all active windows in the list and will be
767 		 * reopened windows based on the new lpar_creds on the
768 		 * destination system during resume.
769 		 */
770 		if (!migrate && !--excess_creds)
771 			break;
772 	}
773 
774 	return 0;
775 }
776 
777 /*
778  * Get new VAS capabilities when the core add/removal configuration
779  * changes. Reconfig window configurations based on the credits
780  * availability from this new capabilities.
781  */
782 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
783 {
784 	struct vas_cop_feat_caps *caps;
785 	int old_nr_creds;
786 	struct vas_caps *vcaps;
787 	int rc = 0, nr_active_wins;
788 
789 	if (type >= VAS_MAX_FEAT_TYPE) {
790 		pr_err("Invalid credit type %d\n", type);
791 		return -EINVAL;
792 	}
793 
794 	vcaps = &vascaps[type];
795 	caps = &vcaps->caps;
796 
797 	mutex_lock(&vas_pseries_mutex);
798 
799 	old_nr_creds = atomic_read(&caps->nr_total_credits);
800 
801 	atomic_set(&caps->nr_total_credits, new_nr_creds);
802 	/*
803 	 * The total number of available credits may be decreased or
804 	 * increased with DLPAR operation. Means some windows have to be
805 	 * closed / reopened. Hold the vas_pseries_mutex so that the
806 	 * the user space can not open new windows.
807 	 */
808 	if (old_nr_creds <  new_nr_creds) {
809 		/*
810 		 * If the existing target credits is less than the new
811 		 * target, reopen windows if they are closed due to
812 		 * the previous DLPAR (core removal).
813 		 */
814 		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
815 					   false);
816 	} else {
817 		/*
818 		 * # active windows is more than new LPAR available
819 		 * credits. So close the excessive windows.
820 		 * On pseries, each window will have 1 credit.
821 		 */
822 		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
823 		if (nr_active_wins > new_nr_creds)
824 			rc = reconfig_close_windows(vcaps,
825 					nr_active_wins - new_nr_creds,
826 					false);
827 	}
828 
829 	mutex_unlock(&vas_pseries_mutex);
830 	return rc;
831 }
832 /*
833  * Total number of default credits available (target_credits)
834  * in LPAR depends on number of cores configured. It varies based on
835  * whether processors are in shared mode or dedicated mode.
836  * Get the notifier when CPU configuration is changed with DLPAR
837  * operation so that get the new target_credits (vas default capabilities)
838  * and then update the existing windows usage if needed.
839  */
840 static int pseries_vas_notifier(struct notifier_block *nb,
841 				unsigned long action, void *data)
842 {
843 	struct of_reconfig_data *rd = data;
844 	struct device_node *dn = rd->dn;
845 	const __be32 *intserv = NULL;
846 	int new_nr_creds, len, rc = 0;
847 
848 	if ((action == OF_RECONFIG_ATTACH_NODE) ||
849 		(action == OF_RECONFIG_DETACH_NODE))
850 		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
851 					  &len);
852 	/*
853 	 * Processor config is not changed
854 	 */
855 	if (!intserv)
856 		return NOTIFY_OK;
857 
858 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
859 					vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
860 					(u64)virt_to_phys(&hv_cop_caps));
861 	if (!rc) {
862 		new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
863 		rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
864 						new_nr_creds);
865 	}
866 
867 	if (rc)
868 		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
869 
870 	return rc;
871 }
872 
873 static struct notifier_block pseries_vas_nb = {
874 	.notifier_call = pseries_vas_notifier,
875 };
876 
877 /*
878  * For LPM, all windows have to be closed on the source partition
879  * before migration and reopen them on the destination partition
880  * after migration. So closing windows during suspend and
881  * reopen them during resume.
882  */
883 int vas_migration_handler(int action)
884 {
885 	struct vas_cop_feat_caps *caps;
886 	int old_nr_creds, new_nr_creds = 0;
887 	struct vas_caps *vcaps;
888 	int i, rc = 0;
889 
890 	/*
891 	 * NX-GZIP is not enabled. Nothing to do for migration.
892 	 */
893 	if (!copypaste_feat)
894 		return rc;
895 
896 	mutex_lock(&vas_pseries_mutex);
897 
898 	if (action == VAS_SUSPEND)
899 		migration_in_progress = true;
900 	else
901 		migration_in_progress = false;
902 
903 	for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
904 		vcaps = &vascaps[i];
905 		caps = &vcaps->caps;
906 		old_nr_creds = atomic_read(&caps->nr_total_credits);
907 
908 		rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
909 					      vcaps->feat,
910 					      (u64)virt_to_phys(&hv_cop_caps));
911 		if (!rc) {
912 			new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
913 			/*
914 			 * Should not happen. But incase print messages, close
915 			 * all windows in the list during suspend and reopen
916 			 * windows based on new lpar_creds on the destination
917 			 * system.
918 			 */
919 			if (old_nr_creds != new_nr_creds) {
920 				pr_err("Target credits mismatch with the hypervisor\n");
921 				pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
922 					action, old_nr_creds, new_nr_creds);
923 				pr_err("Used creds: %d, Active creds: %d\n",
924 					atomic_read(&caps->nr_used_credits),
925 					vcaps->nr_open_windows - vcaps->nr_close_wins);
926 			}
927 		} else {
928 			pr_err("state(%d): Get VAS capabilities failed with %d\n",
929 				action, rc);
930 			/*
931 			 * We can not stop migration with the current lpm
932 			 * implementation. So continue closing all windows in
933 			 * the list (during suspend) and return without
934 			 * opening windows (during resume) if VAS capabilities
935 			 * HCALL failed.
936 			 */
937 			if (action == VAS_RESUME)
938 				goto out;
939 		}
940 
941 		switch (action) {
942 		case VAS_SUSPEND:
943 			rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
944 							true);
945 			break;
946 		case VAS_RESUME:
947 			atomic_set(&caps->nr_total_credits, new_nr_creds);
948 			rc = reconfig_open_windows(vcaps, new_nr_creds, true);
949 			break;
950 		default:
951 			/* should not happen */
952 			pr_err("Invalid migration action %d\n", action);
953 			rc = -EINVAL;
954 			goto out;
955 		}
956 
957 		/*
958 		 * Ignore errors during suspend and return for resume.
959 		 */
960 		if (rc && (action == VAS_RESUME))
961 			goto out;
962 	}
963 
964 out:
965 	mutex_unlock(&vas_pseries_mutex);
966 	return rc;
967 }
968 
969 static int __init pseries_vas_init(void)
970 {
971 	struct hv_vas_all_caps *hv_caps;
972 	int rc = 0;
973 
974 	/*
975 	 * Linux supports user space COPY/PASTE only with Radix
976 	 */
977 	if (!radix_enabled()) {
978 		pr_err("API is supported only with radix page tables\n");
979 		return -ENOTSUPP;
980 	}
981 
982 	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
983 	if (!hv_caps)
984 		return -ENOMEM;
985 	/*
986 	 * Get VAS overall capabilities by passing 0 to feature type.
987 	 */
988 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
989 					  (u64)virt_to_phys(hv_caps));
990 	if (rc)
991 		goto out;
992 
993 	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
994 	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
995 
996 	sysfs_pseries_vas_init(&caps_all);
997 
998 	/*
999 	 * QOS capabilities available
1000 	 */
1001 	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1002 		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1003 					  VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1004 
1005 		if (rc)
1006 			goto out;
1007 	}
1008 	/*
1009 	 * Default capabilities available
1010 	 */
1011 	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1012 		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1013 					  VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1014 
1015 	if (!rc && copypaste_feat) {
1016 		if (firmware_has_feature(FW_FEATURE_LPAR))
1017 			of_reconfig_notifier_register(&pseries_vas_nb);
1018 
1019 		pr_info("GZIP feature is available\n");
1020 	} else {
1021 		/*
1022 		 * Should not happen, but only when get default
1023 		 * capabilities HCALL failed. So disable copy paste
1024 		 * feature.
1025 		 */
1026 		copypaste_feat = false;
1027 	}
1028 
1029 out:
1030 	kfree(hv_caps);
1031 	return rc;
1032 }
1033 machine_device_initcall(pseries, pseries_vas_init);
1034