xref: /linux/arch/powerpc/platforms/pseries/vas.c (revision b903737bc522e0ef3f45a2a60c364ff547572c9b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2020-21 IBM Corp.
4  */
5 
6 #define pr_fmt(fmt) "vas: " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/vas.h>
20 #include "vas.h"
21 
22 #define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
23 #define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
24 /* The hypervisor allows one credit per window right now */
25 #define DEF_WIN_CREDS		1
26 
27 static struct vas_all_caps caps_all;
28 static bool copypaste_feat;
29 
30 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
31 static DEFINE_MUTEX(vas_pseries_mutex);
32 
33 static long hcall_return_busy_check(long rc)
34 {
35 	/* Check if we are stalled for some time */
36 	if (H_IS_LONG_BUSY(rc)) {
37 		msleep(get_longbusy_msecs(rc));
38 		rc = H_BUSY;
39 	} else if (rc == H_BUSY) {
40 		cond_resched();
41 	}
42 
43 	return rc;
44 }
45 
46 /*
47  * Allocate VAS window hcall
48  */
49 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
50 				     u8 wintype, u16 credits)
51 {
52 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
53 	long rc;
54 
55 	do {
56 		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
57 				  credits, domain[0], domain[1], domain[2],
58 				  domain[3], domain[4], domain[5]);
59 
60 		rc = hcall_return_busy_check(rc);
61 	} while (rc == H_BUSY);
62 
63 	if (rc == H_SUCCESS) {
64 		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
65 			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
66 			return -ENOTSUPP;
67 		}
68 		win->vas_win.winid = retbuf[0];
69 		win->win_addr = retbuf[1];
70 		win->complete_irq = retbuf[2];
71 		win->fault_irq = retbuf[3];
72 		return 0;
73 	}
74 
75 	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
76 		rc, wintype, credits);
77 
78 	return -EIO;
79 }
80 
81 /*
82  * Deallocate VAS window hcall.
83  */
84 static int h_deallocate_vas_window(u64 winid)
85 {
86 	long rc;
87 
88 	do {
89 		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
90 
91 		rc = hcall_return_busy_check(rc);
92 	} while (rc == H_BUSY);
93 
94 	if (rc == H_SUCCESS)
95 		return 0;
96 
97 	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
98 		rc, winid);
99 	return -EIO;
100 }
101 
102 /*
103  * Modify VAS window.
104  * After the window is opened with allocate window hcall, configure it
105  * with flags and LPAR PID before using.
106  */
107 static int h_modify_vas_window(struct pseries_vas_window *win)
108 {
109 	long rc;
110 
111 	/*
112 	 * AMR value is not supported in Linux VAS implementation.
113 	 * The hypervisor ignores it if 0 is passed.
114 	 */
115 	do {
116 		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
117 					win->vas_win.winid, win->pid, 0,
118 					VAS_MOD_WIN_FLAGS, 0);
119 
120 		rc = hcall_return_busy_check(rc);
121 	} while (rc == H_BUSY);
122 
123 	if (rc == H_SUCCESS)
124 		return 0;
125 
126 	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
127 			rc, win->vas_win.winid, win->pid);
128 	return -EIO;
129 }
130 
131 /*
132  * This hcall is used to determine the capabilities from the hypervisor.
133  * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
134  * @query_type: If 0 is passed, the hypervisor returns the overall
135  *		capabilities which provides all feature(s) that are
136  *		available. Then query the hypervisor to get the
137  *		corresponding capabilities for the specific feature.
138  *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
139  *			and VAS GZIP Default capabilities.
140  *			H_QUERY_NX_CAPABILITIES provides NX GZIP
141  *			capabilities.
142  * @result: Return buffer to save capabilities.
143  */
144 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
145 {
146 	long rc;
147 
148 	rc = plpar_hcall_norets(hcall, query_type, result);
149 
150 	if (rc == H_SUCCESS)
151 		return 0;
152 
153 	/* H_FUNCTION means HV does not support VAS so don't print an error */
154 	if (rc != H_FUNCTION) {
155 		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
156 			(hcall == H_QUERY_VAS_CAPABILITIES) ?
157 				"H_QUERY_VAS_CAPABILITIES" :
158 				"H_QUERY_NX_CAPABILITIES",
159 			rc, query_type, result);
160 	}
161 
162 	return -EIO;
163 }
164 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
165 
166 /*
167  * hcall to get fault CRB from the hypervisor.
168  */
169 static int h_get_nx_fault(u32 winid, u64 buffer)
170 {
171 	long rc;
172 
173 	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
174 
175 	if (rc == H_SUCCESS)
176 		return 0;
177 
178 	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
179 		rc, winid, buffer);
180 	return -EIO;
181 
182 }
183 
184 /*
185  * Handle the fault interrupt.
186  * When the fault interrupt is received for each window, query the
187  * hypervisor to get the fault CRB on the specific fault. Then
188  * process the CRB by updating CSB or send signal if the user space
189  * CSB is invalid.
190  * Note: The hypervisor forwards an interrupt for each fault request.
191  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
192  */
193 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
194 {
195 	struct pseries_vas_window *txwin = data;
196 	struct coprocessor_request_block crb;
197 	struct vas_user_win_ref *tsk_ref;
198 	int rc;
199 
200 	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
201 	if (!rc) {
202 		tsk_ref = &txwin->vas_win.task_ref;
203 		vas_dump_crb(&crb);
204 		vas_update_csb(&crb, tsk_ref);
205 	}
206 
207 	return IRQ_HANDLED;
208 }
209 
210 /*
211  * Allocate window and setup IRQ mapping.
212  */
213 static int allocate_setup_window(struct pseries_vas_window *txwin,
214 				 u64 *domain, u8 wintype)
215 {
216 	int rc;
217 
218 	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
219 	if (rc)
220 		return rc;
221 	/*
222 	 * On PowerVM, the hypervisor setup and forwards the fault
223 	 * interrupt per window. So the IRQ setup and fault handling
224 	 * will be done for each open window separately.
225 	 */
226 	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
227 	if (!txwin->fault_virq) {
228 		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
229 		rc = -EINVAL;
230 		goto out_win;
231 	}
232 
233 	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
234 				txwin->vas_win.winid);
235 	if (!txwin->name) {
236 		rc = -ENOMEM;
237 		goto out_irq;
238 	}
239 
240 	rc = request_threaded_irq(txwin->fault_virq, NULL,
241 				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
242 				  txwin->name, txwin);
243 	if (rc) {
244 		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
245 		       txwin->vas_win.winid, txwin->fault_virq, rc);
246 		goto out_free;
247 	}
248 
249 	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
250 
251 	return 0;
252 out_free:
253 	kfree(txwin->name);
254 out_irq:
255 	irq_dispose_mapping(txwin->fault_virq);
256 out_win:
257 	h_deallocate_vas_window(txwin->vas_win.winid);
258 	return rc;
259 }
260 
261 static inline void free_irq_setup(struct pseries_vas_window *txwin)
262 {
263 	free_irq(txwin->fault_virq, txwin);
264 	kfree(txwin->name);
265 	irq_dispose_mapping(txwin->fault_virq);
266 }
267 
268 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
269 					      enum vas_cop_type cop_type)
270 {
271 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
272 	struct vas_cop_feat_caps *cop_feat_caps;
273 	struct vas_caps *caps;
274 	struct pseries_vas_window *txwin;
275 	int rc;
276 
277 	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
278 	if (!txwin)
279 		return ERR_PTR(-ENOMEM);
280 
281 	/*
282 	 * A VAS window can have many credits which means that many
283 	 * requests can be issued simultaneously. But the hypervisor
284 	 * restricts one credit per window.
285 	 * The hypervisor introduces 2 different types of credits:
286 	 * Default credit type (Uses normal priority FIFO):
287 	 *	A limited number of credits are assigned to partitions
288 	 *	based on processor entitlement. But these credits may be
289 	 *	over-committed on a system depends on whether the CPUs
290 	 *	are in shared or dedicated modes - that is, more requests
291 	 *	may be issued across the system than NX can service at
292 	 *	once which can result in paste command failure (RMA_busy).
293 	 *	Then the process has to resend requests or fall-back to
294 	 *	SW compression.
295 	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
296 	 *	To avoid NX HW contention, the system admins can assign
297 	 *	QoS credits for each LPAR so that this partition is
298 	 *	guaranteed access to NX resources. These credits are
299 	 *	assigned to partitions via the HMC.
300 	 *	Refer PAPR for more information.
301 	 *
302 	 * Allocate window with QoS credits if user requested. Otherwise
303 	 * default credits are used.
304 	 */
305 	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
306 		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
307 	else
308 		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
309 
310 	cop_feat_caps = &caps->caps;
311 
312 	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
313 			atomic_read(&cop_feat_caps->nr_total_credits)) {
314 		pr_err("Credits are not available to allocate window\n");
315 		rc = -EINVAL;
316 		goto out;
317 	}
318 
319 	if (vas_id == -1) {
320 		/*
321 		 * The user space is requesting to allocate a window on
322 		 * a VAS instance where the process is executing.
323 		 * On PowerVM, domain values are passed to the hypervisor
324 		 * to select VAS instance. Useful if the process is
325 		 * affinity to NUMA node.
326 		 * The hypervisor selects VAS instance if
327 		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
328 		 * The h_allocate_vas_window hcall is defined to take a
329 		 * domain values as specified by h_home_node_associativity,
330 		 * So no unpacking needs to be done.
331 		 */
332 		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
333 				  VPHN_FLAG_VCPU, smp_processor_id());
334 		if (rc != H_SUCCESS) {
335 			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
336 			goto out;
337 		}
338 	}
339 
340 	txwin->pid = mfspr(SPRN_PID);
341 
342 	/*
343 	 * Allocate / Deallocate window hcalls and setup / free IRQs
344 	 * have to be protected with mutex.
345 	 * Open VAS window: Allocate window hcall and setup IRQ
346 	 * Close VAS window: Deallocate window hcall and free IRQ
347 	 *	The hypervisor waits until all NX requests are
348 	 *	completed before closing the window. So expects OS
349 	 *	to handle NX faults, means IRQ can be freed only
350 	 *	after the deallocate window hcall is returned.
351 	 * So once the window is closed with deallocate hcall before
352 	 * the IRQ is freed, it can be assigned to new allocate
353 	 * hcall with the same fault IRQ by the hypervisor. It can
354 	 * result in setup IRQ fail for the new window since the
355 	 * same fault IRQ is not freed by the OS before.
356 	 */
357 	mutex_lock(&vas_pseries_mutex);
358 	rc = allocate_setup_window(txwin, (u64 *)&domain[0],
359 				   cop_feat_caps->win_type);
360 	mutex_unlock(&vas_pseries_mutex);
361 	if (rc)
362 		goto out;
363 
364 	/*
365 	 * Modify window and it is ready to use.
366 	 */
367 	rc = h_modify_vas_window(txwin);
368 	if (!rc)
369 		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
370 	if (rc)
371 		goto out_free;
372 
373 	txwin->win_type = cop_feat_caps->win_type;
374 	mutex_lock(&vas_pseries_mutex);
375 	/*
376 	 * Possible to lose the acquired credit with DLPAR core
377 	 * removal after the window is opened. So if there are any
378 	 * closed windows (means with lost credits), do not give new
379 	 * window to user space. New windows will be opened only
380 	 * after the existing windows are reopened when credits are
381 	 * available.
382 	 */
383 	if (!caps->nr_close_wins) {
384 		list_add(&txwin->win_list, &caps->list);
385 		caps->nr_open_windows++;
386 		mutex_unlock(&vas_pseries_mutex);
387 		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
388 		return &txwin->vas_win;
389 	}
390 	mutex_unlock(&vas_pseries_mutex);
391 
392 	put_vas_user_win_ref(&txwin->vas_win.task_ref);
393 	rc = -EBUSY;
394 	pr_err("No credit is available to allocate window\n");
395 
396 out_free:
397 	/*
398 	 * Window is not operational. Free IRQ before closing
399 	 * window so that do not have to hold mutex.
400 	 */
401 	free_irq_setup(txwin);
402 	h_deallocate_vas_window(txwin->vas_win.winid);
403 out:
404 	atomic_dec(&cop_feat_caps->nr_used_credits);
405 	kfree(txwin);
406 	return ERR_PTR(rc);
407 }
408 
409 static u64 vas_paste_address(struct vas_window *vwin)
410 {
411 	struct pseries_vas_window *win;
412 
413 	win = container_of(vwin, struct pseries_vas_window, vas_win);
414 	return win->win_addr;
415 }
416 
417 static int deallocate_free_window(struct pseries_vas_window *win)
418 {
419 	int rc = 0;
420 
421 	/*
422 	 * The hypervisor waits for all requests including faults
423 	 * are processed before closing the window - Means all
424 	 * credits have to be returned. In the case of fault
425 	 * request, a credit is returned after OS issues
426 	 * H_GET_NX_FAULT hcall.
427 	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
428 	 * hcall.
429 	 */
430 	rc = h_deallocate_vas_window(win->vas_win.winid);
431 	if (!rc)
432 		free_irq_setup(win);
433 
434 	return rc;
435 }
436 
437 static int vas_deallocate_window(struct vas_window *vwin)
438 {
439 	struct pseries_vas_window *win;
440 	struct vas_cop_feat_caps *caps;
441 	int rc = 0;
442 
443 	if (!vwin)
444 		return -EINVAL;
445 
446 	win = container_of(vwin, struct pseries_vas_window, vas_win);
447 
448 	/* Should not happen */
449 	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
450 		pr_err("Window (%u): Invalid window type %u\n",
451 				vwin->winid, win->win_type);
452 		return -EINVAL;
453 	}
454 
455 	caps = &vascaps[win->win_type].caps;
456 	mutex_lock(&vas_pseries_mutex);
457 	/*
458 	 * VAS window is already closed in the hypervisor when
459 	 * lost the credit. So just remove the entry from
460 	 * the list, remove task references and free vas_window
461 	 * struct.
462 	 */
463 	if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) {
464 		rc = deallocate_free_window(win);
465 		if (rc) {
466 			mutex_unlock(&vas_pseries_mutex);
467 			return rc;
468 		}
469 	} else
470 		vascaps[win->win_type].nr_close_wins--;
471 
472 	list_del(&win->win_list);
473 	atomic_dec(&caps->nr_used_credits);
474 	vascaps[win->win_type].nr_open_windows--;
475 	mutex_unlock(&vas_pseries_mutex);
476 
477 	put_vas_user_win_ref(&vwin->task_ref);
478 	mm_context_remove_vas_window(vwin->task_ref.mm);
479 
480 	kfree(win);
481 	return 0;
482 }
483 
484 static const struct vas_user_win_ops vops_pseries = {
485 	.open_win	= vas_allocate_window,	/* Open and configure window */
486 	.paste_addr	= vas_paste_address,	/* To do copy/paste */
487 	.close_win	= vas_deallocate_window, /* Close window */
488 };
489 
490 /*
491  * Supporting only nx-gzip coprocessor type now, but this API code
492  * extended to other coprocessor types later.
493  */
494 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
495 			     const char *name)
496 {
497 	int rc;
498 
499 	if (!copypaste_feat)
500 		return -ENOTSUPP;
501 
502 	rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
503 
504 	return rc;
505 }
506 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
507 
508 void vas_unregister_api_pseries(void)
509 {
510 	vas_unregister_coproc_api();
511 }
512 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
513 
514 /*
515  * Get the specific capabilities based on the feature type.
516  * Right now supports GZIP default and GZIP QoS capabilities.
517  */
518 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
519 				struct hv_vas_cop_feat_caps *hv_caps)
520 {
521 	struct vas_cop_feat_caps *caps;
522 	struct vas_caps *vcaps;
523 	int rc = 0;
524 
525 	vcaps = &vascaps[type];
526 	memset(vcaps, 0, sizeof(*vcaps));
527 	INIT_LIST_HEAD(&vcaps->list);
528 
529 	vcaps->feat = feat;
530 	caps = &vcaps->caps;
531 
532 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
533 					  (u64)virt_to_phys(hv_caps));
534 	if (rc)
535 		return rc;
536 
537 	caps->user_mode = hv_caps->user_mode;
538 	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
539 		pr_err("User space COPY/PASTE is not supported\n");
540 		return -ENOTSUPP;
541 	}
542 
543 	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
544 	caps->win_type = hv_caps->win_type;
545 	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
546 		pr_err("Unsupported window type %u\n", caps->win_type);
547 		return -EINVAL;
548 	}
549 	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
550 	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
551 	atomic_set(&caps->nr_total_credits,
552 		   be16_to_cpu(hv_caps->target_lpar_creds));
553 	if (feat == VAS_GZIP_DEF_FEAT) {
554 		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
555 
556 		if (caps->max_win_creds < DEF_WIN_CREDS) {
557 			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
558 			       DEF_WIN_CREDS, caps->max_win_creds);
559 			return -EINVAL;
560 		}
561 	}
562 
563 	rc = sysfs_add_vas_caps(caps);
564 	if (rc)
565 		return rc;
566 
567 	copypaste_feat = true;
568 
569 	return 0;
570 }
571 
572 /*
573  * VAS windows can be closed due to lost credits when the core is
574  * removed. So reopen them if credits are available due to DLPAR
575  * core add and set the window active status. When NX sees the page
576  * fault on the unmapped paste address, the kernel handles the fault
577  * by setting the remapping to new paste address if the window is
578  * active.
579  */
580 static int reconfig_open_windows(struct vas_caps *vcaps, int creds)
581 {
582 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
583 	struct vas_cop_feat_caps *caps = &vcaps->caps;
584 	struct pseries_vas_window *win = NULL, *tmp;
585 	int rc, mv_ents = 0;
586 
587 	/*
588 	 * Nothing to do if there are no closed windows.
589 	 */
590 	if (!vcaps->nr_close_wins)
591 		return 0;
592 
593 	/*
594 	 * For the core removal, the hypervisor reduces the credits
595 	 * assigned to the LPAR and the kernel closes VAS windows
596 	 * in the hypervisor depends on reduced credits. The kernel
597 	 * uses LIFO (the last windows that are opened will be closed
598 	 * first) and expects to open in the same order when credits
599 	 * are available.
600 	 * For example, 40 windows are closed when the LPAR lost 2 cores
601 	 * (dedicated). If 1 core is added, this LPAR can have 20 more
602 	 * credits. It means the kernel can reopen 20 windows. So move
603 	 * 20 entries in the VAS windows lost and reopen next 20 windows.
604 	 */
605 	if (vcaps->nr_close_wins > creds)
606 		mv_ents = vcaps->nr_close_wins - creds;
607 
608 	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
609 		if (!mv_ents)
610 			break;
611 
612 		mv_ents--;
613 	}
614 
615 	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
616 		/*
617 		 * Nothing to do on this window if it is not closed
618 		 * with VAS_WIN_NO_CRED_CLOSE
619 		 */
620 		if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE))
621 			continue;
622 
623 		rc = allocate_setup_window(win, (u64 *)&domain[0],
624 					   caps->win_type);
625 		if (rc)
626 			return rc;
627 
628 		rc = h_modify_vas_window(win);
629 		if (rc)
630 			goto out;
631 
632 		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
633 		/*
634 		 * Set window status to active
635 		 */
636 		win->vas_win.status &= ~VAS_WIN_NO_CRED_CLOSE;
637 		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
638 		win->win_type = caps->win_type;
639 		if (!--vcaps->nr_close_wins)
640 			break;
641 	}
642 
643 	return 0;
644 out:
645 	/*
646 	 * Window modify HCALL failed. So close the window to the
647 	 * hypervisor and return.
648 	 */
649 	free_irq_setup(win);
650 	h_deallocate_vas_window(win->vas_win.winid);
651 	return rc;
652 }
653 
654 /*
655  * The hypervisor reduces the available credits if the LPAR lost core. It
656  * means the excessive windows should not be active and the user space
657  * should not be using these windows to send compression requests to NX.
658  * So the kernel closes the excessive windows and unmap the paste address
659  * such that the user space receives paste instruction failure. Then up to
660  * the user space to fall back to SW compression and manage with the
661  * existing windows.
662  */
663 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds)
664 {
665 	struct pseries_vas_window *win, *tmp;
666 	struct vas_user_win_ref *task_ref;
667 	struct vm_area_struct *vma;
668 	int rc = 0;
669 
670 	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
671 		/*
672 		 * This window is already closed due to lost credit
673 		 * before. Go for next window.
674 		 */
675 		if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)
676 			continue;
677 
678 		task_ref = &win->vas_win.task_ref;
679 		mutex_lock(&task_ref->mmap_mutex);
680 		vma = task_ref->vma;
681 		/*
682 		 * Number of available credits are reduced, So select
683 		 * and close windows.
684 		 */
685 		win->vas_win.status |= VAS_WIN_NO_CRED_CLOSE;
686 
687 		mmap_write_lock(task_ref->mm);
688 		/*
689 		 * vma is set in the original mapping. But this mapping
690 		 * is done with mmap() after the window is opened with ioctl.
691 		 * so we may not see the original mapping if the core remove
692 		 * is done before the original mmap() and after the ioctl.
693 		 */
694 		if (vma)
695 			zap_page_range(vma, vma->vm_start,
696 					vma->vm_end - vma->vm_start);
697 
698 		mmap_write_unlock(task_ref->mm);
699 		mutex_unlock(&task_ref->mmap_mutex);
700 		/*
701 		 * Close VAS window in the hypervisor, but do not
702 		 * free vas_window struct since it may be reused
703 		 * when the credit is available later (DLPAR with
704 		 * adding cores). This struct will be used
705 		 * later when the process issued with close(FD).
706 		 */
707 		rc = deallocate_free_window(win);
708 		if (rc)
709 			return rc;
710 
711 		vcap->nr_close_wins++;
712 
713 		if (!--excess_creds)
714 			break;
715 	}
716 
717 	return 0;
718 }
719 
720 /*
721  * Get new VAS capabilities when the core add/removal configuration
722  * changes. Reconfig window configurations based on the credits
723  * availability from this new capabilities.
724  */
725 static int vas_reconfig_capabilties(u8 type)
726 {
727 	struct hv_vas_cop_feat_caps *hv_caps;
728 	struct vas_cop_feat_caps *caps;
729 	int old_nr_creds, new_nr_creds;
730 	struct vas_caps *vcaps;
731 	int rc = 0, nr_active_wins;
732 
733 	if (type >= VAS_MAX_FEAT_TYPE) {
734 		pr_err("Invalid credit type %d\n", type);
735 		return -EINVAL;
736 	}
737 
738 	vcaps = &vascaps[type];
739 	caps = &vcaps->caps;
740 
741 	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
742 	if (!hv_caps)
743 		return -ENOMEM;
744 
745 	mutex_lock(&vas_pseries_mutex);
746 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat,
747 				      (u64)virt_to_phys(hv_caps));
748 	if (rc)
749 		goto out;
750 
751 	new_nr_creds = be16_to_cpu(hv_caps->target_lpar_creds);
752 
753 	old_nr_creds = atomic_read(&caps->nr_total_credits);
754 
755 	atomic_set(&caps->nr_total_credits, new_nr_creds);
756 	/*
757 	 * The total number of available credits may be decreased or
758 	 * inceased with DLPAR operation. Means some windows have to be
759 	 * closed / reopened. Hold the vas_pseries_mutex so that the
760 	 * the user space can not open new windows.
761 	 */
762 	if (old_nr_creds <  new_nr_creds) {
763 		/*
764 		 * If the existing target credits is less than the new
765 		 * target, reopen windows if they are closed due to
766 		 * the previous DLPAR (core removal).
767 		 */
768 		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds);
769 	} else {
770 		/*
771 		 * # active windows is more than new LPAR available
772 		 * credits. So close the excessive windows.
773 		 * On pseries, each window will have 1 credit.
774 		 */
775 		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
776 		if (nr_active_wins > new_nr_creds)
777 			rc = reconfig_close_windows(vcaps,
778 					nr_active_wins - new_nr_creds);
779 	}
780 
781 out:
782 	mutex_unlock(&vas_pseries_mutex);
783 	kfree(hv_caps);
784 	return rc;
785 }
786 /*
787  * Total number of default credits available (target_credits)
788  * in LPAR depends on number of cores configured. It varies based on
789  * whether processors are in shared mode or dedicated mode.
790  * Get the notifier when CPU configuration is changed with DLPAR
791  * operation so that get the new target_credits (vas default capabilities)
792  * and then update the existing windows usage if needed.
793  */
794 static int pseries_vas_notifier(struct notifier_block *nb,
795 				unsigned long action, void *data)
796 {
797 	struct of_reconfig_data *rd = data;
798 	struct device_node *dn = rd->dn;
799 	const __be32 *intserv = NULL;
800 	int len, rc = 0;
801 
802 	if ((action == OF_RECONFIG_ATTACH_NODE) ||
803 		(action == OF_RECONFIG_DETACH_NODE))
804 		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
805 					  &len);
806 	/*
807 	 * Processor config is not changed
808 	 */
809 	if (!intserv)
810 		return NOTIFY_OK;
811 
812 	rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE);
813 	if (rc)
814 		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
815 
816 	return rc;
817 }
818 
819 static struct notifier_block pseries_vas_nb = {
820 	.notifier_call = pseries_vas_notifier,
821 };
822 
823 static int __init pseries_vas_init(void)
824 {
825 	struct hv_vas_cop_feat_caps *hv_cop_caps;
826 	struct hv_vas_all_caps *hv_caps;
827 	int rc;
828 
829 	/*
830 	 * Linux supports user space COPY/PASTE only with Radix
831 	 */
832 	if (!radix_enabled()) {
833 		pr_err("API is supported only with radix page tables\n");
834 		return -ENOTSUPP;
835 	}
836 
837 	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
838 	if (!hv_caps)
839 		return -ENOMEM;
840 	/*
841 	 * Get VAS overall capabilities by passing 0 to feature type.
842 	 */
843 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
844 					  (u64)virt_to_phys(hv_caps));
845 	if (rc)
846 		goto out;
847 
848 	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
849 	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
850 
851 	sysfs_pseries_vas_init(&caps_all);
852 
853 	hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
854 	if (!hv_cop_caps) {
855 		rc = -ENOMEM;
856 		goto out;
857 	}
858 	/*
859 	 * QOS capabilities available
860 	 */
861 	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
862 		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
863 					  VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
864 
865 		if (rc)
866 			goto out_cop;
867 	}
868 	/*
869 	 * Default capabilities available
870 	 */
871 	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
872 		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
873 					  VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
874 		if (rc)
875 			goto out_cop;
876 	}
877 
878 	if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR))
879 		of_reconfig_notifier_register(&pseries_vas_nb);
880 
881 	pr_info("GZIP feature is available\n");
882 
883 out_cop:
884 	kfree(hv_cop_caps);
885 out:
886 	kfree(hv_caps);
887 	return rc;
888 }
889 machine_device_initcall(pseries, pseries_vas_init);
890