xref: /linux/arch/powerpc/platforms/pseries/mobility.c (revision 6cb44bef35ac11724ef22c5ae4f1bc607e2ef3d8)
1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2410bccf9SNathan Fontenot /*
3410bccf9SNathan Fontenot  * Support for Partition Mobility/Migration
4410bccf9SNathan Fontenot  *
5410bccf9SNathan Fontenot  * Copyright (C) 2010 Nathan Fontenot
6410bccf9SNathan Fontenot  * Copyright (C) 2010 IBM Corporation
7410bccf9SNathan Fontenot  */
8410bccf9SNathan Fontenot 
9494a66f3SNathan Lynch 
10494a66f3SNathan Lynch #define pr_fmt(fmt) "mobility: " fmt
11494a66f3SNathan Lynch 
12e59a175fSNathan Lynch #include <linux/cpu.h>
13410bccf9SNathan Fontenot #include <linux/kernel.h>
14410bccf9SNathan Fontenot #include <linux/kobject.h>
159327dc0aSNathan Lynch #include <linux/nmi.h>
16ccfb5bd7SNathan Lynch #include <linux/sched.h>
17410bccf9SNathan Fontenot #include <linux/smp.h>
18b56eade5SPaul Gortmaker #include <linux/stat.h>
199327dc0aSNathan Lynch #include <linux/stop_machine.h>
20410bccf9SNathan Fontenot #include <linux/completion.h>
21410bccf9SNathan Fontenot #include <linux/device.h>
22410bccf9SNathan Fontenot #include <linux/delay.h>
23410bccf9SNathan Fontenot #include <linux/slab.h>
245c35a02cSChristophe Leroy #include <linux/stringify.h>
25410bccf9SNathan Fontenot 
268e83e905SMichael Ellerman #include <asm/machdep.h>
27*6cb44befSDouglas Anderson #include <asm/nmi.h>
28410bccf9SNathan Fontenot #include <asm/rtas.h>
29410bccf9SNathan Fontenot #include "pseries.h"
3037e67648SHaren Myneni #include "vas.h"	/* vas_migration_handler() */
31e610a466SNathan Lynch #include "../../kernel/cacheinfo.h"
32410bccf9SNathan Fontenot 
33410bccf9SNathan Fontenot static struct kobject *mobility_kobj;
34410bccf9SNathan Fontenot 
35410bccf9SNathan Fontenot struct update_props_workarea {
36f6ff0414STyrel Datwyler 	__be32 phandle;
37f6ff0414STyrel Datwyler 	__be32 state;
38f6ff0414STyrel Datwyler 	__be64 reserved;
39f6ff0414STyrel Datwyler 	__be32 nprops;
40d0ef4403STyrel Datwyler } __packed;
41410bccf9SNathan Fontenot 
42410bccf9SNathan Fontenot #define NODE_ACTION_MASK	0xff000000
43410bccf9SNathan Fontenot #define NODE_COUNT_MASK		0x00ffffff
44410bccf9SNathan Fontenot 
45410bccf9SNathan Fontenot #define DELETE_DT_NODE	0x01000000
46410bccf9SNathan Fontenot #define UPDATE_DT_NODE	0x02000000
47410bccf9SNathan Fontenot #define ADD_DT_NODE	0x03000000
48410bccf9SNathan Fontenot 
49762ec157SNathan Fontenot #define MIGRATION_SCOPE	(1)
50675d8ee6SJohn Allen #define PRRN_SCOPE -2
51762ec157SNathan Fontenot 
52118b1366SLaurent Dufour #ifdef CONFIG_PPC_WATCHDOG
53118b1366SLaurent Dufour static unsigned int nmi_wd_lpm_factor = 200;
54118b1366SLaurent Dufour 
55118b1366SLaurent Dufour #ifdef CONFIG_SYSCTL
56118b1366SLaurent Dufour static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = {
57118b1366SLaurent Dufour 	{
58118b1366SLaurent Dufour 		.procname	= "nmi_wd_lpm_factor",
59118b1366SLaurent Dufour 		.data		= &nmi_wd_lpm_factor,
60118b1366SLaurent Dufour 		.maxlen		= sizeof(int),
61118b1366SLaurent Dufour 		.mode		= 0644,
62118b1366SLaurent Dufour 		.proc_handler	= proc_douintvec_minmax,
63118b1366SLaurent Dufour 	},
64118b1366SLaurent Dufour 	{}
65118b1366SLaurent Dufour };
66118b1366SLaurent Dufour 
67118b1366SLaurent Dufour static int __init register_nmi_wd_lpm_factor_sysctl(void)
68118b1366SLaurent Dufour {
693a713753SLuis Chamberlain 	register_sysctl("kernel", nmi_wd_lpm_factor_ctl_table);
70118b1366SLaurent Dufour 
71118b1366SLaurent Dufour 	return 0;
72118b1366SLaurent Dufour }
73118b1366SLaurent Dufour device_initcall(register_nmi_wd_lpm_factor_sysctl);
74118b1366SLaurent Dufour #endif /* CONFIG_SYSCTL */
75118b1366SLaurent Dufour #endif /* CONFIG_PPC_WATCHDOG */
76118b1366SLaurent Dufour 
77762ec157SNathan Fontenot static int mobility_rtas_call(int token, char *buf, s32 scope)
78410bccf9SNathan Fontenot {
79410bccf9SNathan Fontenot 	int rc;
80410bccf9SNathan Fontenot 
81410bccf9SNathan Fontenot 	spin_lock(&rtas_data_buf_lock);
82410bccf9SNathan Fontenot 
83410bccf9SNathan Fontenot 	memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
84762ec157SNathan Fontenot 	rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
85410bccf9SNathan Fontenot 	memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
86410bccf9SNathan Fontenot 
87410bccf9SNathan Fontenot 	spin_unlock(&rtas_data_buf_lock);
88410bccf9SNathan Fontenot 	return rc;
89410bccf9SNathan Fontenot }
90410bccf9SNathan Fontenot 
912efd7f6eSNathan Lynch static int delete_dt_node(struct device_node *dn)
92410bccf9SNathan Fontenot {
93319fa1a5SNathan Lynch 	struct device_node *pdn;
94319fa1a5SNathan Lynch 	bool is_platfac;
95319fa1a5SNathan Lynch 
96319fa1a5SNathan Lynch 	pdn = of_get_parent(dn);
97319fa1a5SNathan Lynch 	is_platfac = of_node_is_type(dn, "ibm,platform-facilities") ||
98319fa1a5SNathan Lynch 		     of_node_is_type(pdn, "ibm,platform-facilities");
99319fa1a5SNathan Lynch 	of_node_put(pdn);
100319fa1a5SNathan Lynch 
101319fa1a5SNathan Lynch 	/*
102319fa1a5SNathan Lynch 	 * The drivers that bind to nodes in the platform-facilities
103319fa1a5SNathan Lynch 	 * hierarchy don't support node removal, and the removal directive
104319fa1a5SNathan Lynch 	 * from firmware is always followed by an add of an equivalent
105319fa1a5SNathan Lynch 	 * node. The capability (e.g. RNG, encryption, compression)
106319fa1a5SNathan Lynch 	 * represented by the node is never interrupted by the migration.
107319fa1a5SNathan Lynch 	 * So ignore changes to this part of the tree.
108319fa1a5SNathan Lynch 	 */
109319fa1a5SNathan Lynch 	if (is_platfac) {
110319fa1a5SNathan Lynch 		pr_notice("ignoring remove operation for %pOFfp\n", dn);
111319fa1a5SNathan Lynch 		return 0;
112319fa1a5SNathan Lynch 	}
113319fa1a5SNathan Lynch 
1145d8b1f9dSNathan Lynch 	pr_debug("removing node %pOFfp\n", dn);
115410bccf9SNathan Fontenot 	dlpar_detach_node(dn);
116410bccf9SNathan Fontenot 	return 0;
117410bccf9SNathan Fontenot }
118410bccf9SNathan Fontenot 
119410bccf9SNathan Fontenot static int update_dt_property(struct device_node *dn, struct property **prop,
120410bccf9SNathan Fontenot 			      const char *name, u32 vd, char *value)
121410bccf9SNathan Fontenot {
122410bccf9SNathan Fontenot 	struct property *new_prop = *prop;
123410bccf9SNathan Fontenot 	int more = 0;
124410bccf9SNathan Fontenot 
125410bccf9SNathan Fontenot 	/* A negative 'vd' value indicates that only part of the new property
126410bccf9SNathan Fontenot 	 * value is contained in the buffer and we need to call
127410bccf9SNathan Fontenot 	 * ibm,update-properties again to get the rest of the value.
128410bccf9SNathan Fontenot 	 *
129410bccf9SNathan Fontenot 	 * A negative value is also the two's compliment of the actual value.
130410bccf9SNathan Fontenot 	 */
131410bccf9SNathan Fontenot 	if (vd & 0x80000000) {
132410bccf9SNathan Fontenot 		vd = ~vd + 1;
133410bccf9SNathan Fontenot 		more = 1;
134410bccf9SNathan Fontenot 	}
135410bccf9SNathan Fontenot 
136410bccf9SNathan Fontenot 	if (new_prop) {
137410bccf9SNathan Fontenot 		/* partial property fixup */
138410bccf9SNathan Fontenot 		char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
139410bccf9SNathan Fontenot 		if (!new_data)
140410bccf9SNathan Fontenot 			return -ENOMEM;
141410bccf9SNathan Fontenot 
142410bccf9SNathan Fontenot 		memcpy(new_data, new_prop->value, new_prop->length);
143410bccf9SNathan Fontenot 		memcpy(new_data + new_prop->length, value, vd);
144410bccf9SNathan Fontenot 
145410bccf9SNathan Fontenot 		kfree(new_prop->value);
146410bccf9SNathan Fontenot 		new_prop->value = new_data;
147410bccf9SNathan Fontenot 		new_prop->length += vd;
148410bccf9SNathan Fontenot 	} else {
149410bccf9SNathan Fontenot 		new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
150410bccf9SNathan Fontenot 		if (!new_prop)
151410bccf9SNathan Fontenot 			return -ENOMEM;
152410bccf9SNathan Fontenot 
153410bccf9SNathan Fontenot 		new_prop->name = kstrdup(name, GFP_KERNEL);
154410bccf9SNathan Fontenot 		if (!new_prop->name) {
155410bccf9SNathan Fontenot 			kfree(new_prop);
156410bccf9SNathan Fontenot 			return -ENOMEM;
157410bccf9SNathan Fontenot 		}
158410bccf9SNathan Fontenot 
159410bccf9SNathan Fontenot 		new_prop->length = vd;
160410bccf9SNathan Fontenot 		new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
161410bccf9SNathan Fontenot 		if (!new_prop->value) {
162410bccf9SNathan Fontenot 			kfree(new_prop->name);
163410bccf9SNathan Fontenot 			kfree(new_prop);
164410bccf9SNathan Fontenot 			return -ENOMEM;
165410bccf9SNathan Fontenot 		}
166410bccf9SNathan Fontenot 
167410bccf9SNathan Fontenot 		memcpy(new_prop->value, value, vd);
168410bccf9SNathan Fontenot 		*prop = new_prop;
169410bccf9SNathan Fontenot 	}
170410bccf9SNathan Fontenot 
171410bccf9SNathan Fontenot 	if (!more) {
1725d8b1f9dSNathan Lynch 		pr_debug("updating node %pOF property %s\n", dn, name);
17379d1c712SNathan Fontenot 		of_update_property(dn, new_prop);
174d8e533b4STyrel Datwyler 		*prop = NULL;
175410bccf9SNathan Fontenot 	}
176410bccf9SNathan Fontenot 
177410bccf9SNathan Fontenot 	return 0;
178410bccf9SNathan Fontenot }
179410bccf9SNathan Fontenot 
1802efd7f6eSNathan Lynch static int update_dt_node(struct device_node *dn, s32 scope)
181410bccf9SNathan Fontenot {
182410bccf9SNathan Fontenot 	struct update_props_workarea *upwa;
183410bccf9SNathan Fontenot 	struct property *prop = NULL;
184638a405fSTyrel Datwyler 	int i, rc, rtas_rc;
185410bccf9SNathan Fontenot 	char *prop_data;
186410bccf9SNathan Fontenot 	char *rtas_buf;
187410bccf9SNathan Fontenot 	int update_properties_token;
188f6ff0414STyrel Datwyler 	u32 nprops;
1892e9b7b02SNathan Fontenot 	u32 vd;
190410bccf9SNathan Fontenot 
19108273c9fSNathan Lynch 	update_properties_token = rtas_function_token(RTAS_FN_IBM_UPDATE_PROPERTIES);
192410bccf9SNathan Fontenot 	if (update_properties_token == RTAS_UNKNOWN_SERVICE)
193410bccf9SNathan Fontenot 		return -EINVAL;
194410bccf9SNathan Fontenot 
195410bccf9SNathan Fontenot 	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
196410bccf9SNathan Fontenot 	if (!rtas_buf)
197410bccf9SNathan Fontenot 		return -ENOMEM;
198410bccf9SNathan Fontenot 
199410bccf9SNathan Fontenot 	upwa = (struct update_props_workarea *)&rtas_buf[0];
2002efd7f6eSNathan Lynch 	upwa->phandle = cpu_to_be32(dn->phandle);
201410bccf9SNathan Fontenot 
202410bccf9SNathan Fontenot 	do {
203638a405fSTyrel Datwyler 		rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
204762ec157SNathan Fontenot 					scope);
205638a405fSTyrel Datwyler 		if (rtas_rc < 0)
206410bccf9SNathan Fontenot 			break;
207410bccf9SNathan Fontenot 
208410bccf9SNathan Fontenot 		prop_data = rtas_buf + sizeof(*upwa);
209f6ff0414STyrel Datwyler 		nprops = be32_to_cpu(upwa->nprops);
210410bccf9SNathan Fontenot 
211c8f5a57cSTyrel Datwyler 		/* On the first call to ibm,update-properties for a node the
2124c73cadcSJilin Yuan 		 * first property value descriptor contains an empty
213c8f5a57cSTyrel Datwyler 		 * property name, the property value length encoded as u32,
214c8f5a57cSTyrel Datwyler 		 * and the property value is the node path being updated.
2152e9b7b02SNathan Fontenot 		 */
216c8f5a57cSTyrel Datwyler 		if (*prop_data == 0) {
217c8f5a57cSTyrel Datwyler 			prop_data++;
218f6ff0414STyrel Datwyler 			vd = be32_to_cpu(*(__be32 *)prop_data);
219c8f5a57cSTyrel Datwyler 			prop_data += vd + sizeof(vd);
220f6ff0414STyrel Datwyler 			nprops--;
221c8f5a57cSTyrel Datwyler 		}
2222e9b7b02SNathan Fontenot 
223f6ff0414STyrel Datwyler 		for (i = 0; i < nprops; i++) {
2242e9b7b02SNathan Fontenot 			char *prop_name;
2252e9b7b02SNathan Fontenot 
2262e9b7b02SNathan Fontenot 			prop_name = prop_data;
2272e9b7b02SNathan Fontenot 			prop_data += strlen(prop_name) + 1;
228f6ff0414STyrel Datwyler 			vd = be32_to_cpu(*(__be32 *)prop_data);
2292e9b7b02SNathan Fontenot 			prop_data += sizeof(vd);
230410bccf9SNathan Fontenot 
231410bccf9SNathan Fontenot 			switch (vd) {
232410bccf9SNathan Fontenot 			case 0x00000000:
233410bccf9SNathan Fontenot 				/* name only property, nothing to do */
234410bccf9SNathan Fontenot 				break;
235410bccf9SNathan Fontenot 
236410bccf9SNathan Fontenot 			case 0x80000000:
237925e2d1dSSuraj Jitindar Singh 				of_remove_property(dn, of_find_property(dn,
238925e2d1dSSuraj Jitindar Singh 							prop_name, NULL));
239410bccf9SNathan Fontenot 				prop = NULL;
240410bccf9SNathan Fontenot 				break;
241410bccf9SNathan Fontenot 
242410bccf9SNathan Fontenot 			default:
243410bccf9SNathan Fontenot 				rc = update_dt_property(dn, &prop, prop_name,
244410bccf9SNathan Fontenot 							vd, prop_data);
245410bccf9SNathan Fontenot 				if (rc) {
2462d5be6f1SNathan Lynch 					pr_err("updating %s property failed: %d\n",
2472d5be6f1SNathan Lynch 					       prop_name, rc);
248410bccf9SNathan Fontenot 				}
249410bccf9SNathan Fontenot 
250410bccf9SNathan Fontenot 				prop_data += vd;
251aa5e5c9bSNathan Lynch 				break;
252410bccf9SNathan Fontenot 			}
253ccfb5bd7SNathan Lynch 
254ccfb5bd7SNathan Lynch 			cond_resched();
255410bccf9SNathan Fontenot 		}
256ccfb5bd7SNathan Lynch 
257ccfb5bd7SNathan Lynch 		cond_resched();
258638a405fSTyrel Datwyler 	} while (rtas_rc == 1);
259410bccf9SNathan Fontenot 
260410bccf9SNathan Fontenot 	kfree(rtas_buf);
261410bccf9SNathan Fontenot 	return 0;
262410bccf9SNathan Fontenot }
263410bccf9SNathan Fontenot 
2642efd7f6eSNathan Lynch static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
265410bccf9SNathan Fontenot {
266410bccf9SNathan Fontenot 	struct device_node *dn;
267410bccf9SNathan Fontenot 	int rc;
268410bccf9SNathan Fontenot 
2698d5ff320STyrel Datwyler 	dn = dlpar_configure_connector(drc_index, parent_dn);
2702efd7f6eSNathan Lynch 	if (!dn)
271410bccf9SNathan Fontenot 		return -ENOENT;
272410bccf9SNathan Fontenot 
273319fa1a5SNathan Lynch 	/*
274319fa1a5SNathan Lynch 	 * Since delete_dt_node() ignores this node type, this is the
275319fa1a5SNathan Lynch 	 * necessary counterpart. We also know that a platform-facilities
276319fa1a5SNathan Lynch 	 * node returned from dlpar_configure_connector() has children
277319fa1a5SNathan Lynch 	 * attached, and dlpar_attach_node() only adds the parent, leaking
278319fa1a5SNathan Lynch 	 * the children. So ignore these on the add side for now.
279319fa1a5SNathan Lynch 	 */
280319fa1a5SNathan Lynch 	if (of_node_is_type(dn, "ibm,platform-facilities")) {
281319fa1a5SNathan Lynch 		pr_notice("ignoring add operation for %pOF\n", dn);
282319fa1a5SNathan Lynch 		dlpar_free_cc_nodes(dn);
283319fa1a5SNathan Lynch 		return 0;
284319fa1a5SNathan Lynch 	}
285319fa1a5SNathan Lynch 
286215ee763SRob Herring 	rc = dlpar_attach_node(dn, parent_dn);
287410bccf9SNathan Fontenot 	if (rc)
288410bccf9SNathan Fontenot 		dlpar_free_cc_nodes(dn);
289410bccf9SNathan Fontenot 
2905d8b1f9dSNathan Lynch 	pr_debug("added node %pOFfp\n", dn);
2915d8b1f9dSNathan Lynch 
292410bccf9SNathan Fontenot 	return rc;
293410bccf9SNathan Fontenot }
294410bccf9SNathan Fontenot 
29592e6dc25SNathan Lynch static int pseries_devicetree_update(s32 scope)
296410bccf9SNathan Fontenot {
297410bccf9SNathan Fontenot 	char *rtas_buf;
298f6ff0414STyrel Datwyler 	__be32 *data;
299410bccf9SNathan Fontenot 	int update_nodes_token;
300410bccf9SNathan Fontenot 	int rc;
301410bccf9SNathan Fontenot 
30208273c9fSNathan Lynch 	update_nodes_token = rtas_function_token(RTAS_FN_IBM_UPDATE_NODES);
303410bccf9SNathan Fontenot 	if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
304b06a6717SNathan Lynch 		return 0;
305410bccf9SNathan Fontenot 
306410bccf9SNathan Fontenot 	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
307410bccf9SNathan Fontenot 	if (!rtas_buf)
308410bccf9SNathan Fontenot 		return -ENOMEM;
309410bccf9SNathan Fontenot 
310410bccf9SNathan Fontenot 	do {
311762ec157SNathan Fontenot 		rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
312410bccf9SNathan Fontenot 		if (rc && rc != 1)
313410bccf9SNathan Fontenot 			break;
314410bccf9SNathan Fontenot 
315f6ff0414STyrel Datwyler 		data = (__be32 *)rtas_buf + 4;
316f6ff0414STyrel Datwyler 		while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
317410bccf9SNathan Fontenot 			int i;
318f6ff0414STyrel Datwyler 			u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
319f6ff0414STyrel Datwyler 			u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
320410bccf9SNathan Fontenot 
321410bccf9SNathan Fontenot 			data++;
322410bccf9SNathan Fontenot 
323410bccf9SNathan Fontenot 			for (i = 0; i < node_count; i++) {
3242efd7f6eSNathan Lynch 				struct device_node *np;
325f6ff0414STyrel Datwyler 				__be32 phandle = *data++;
326f6ff0414STyrel Datwyler 				__be32 drc_index;
327410bccf9SNathan Fontenot 
3282efd7f6eSNathan Lynch 				np = of_find_node_by_phandle(be32_to_cpu(phandle));
3292efd7f6eSNathan Lynch 				if (!np) {
3302efd7f6eSNathan Lynch 					pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n",
3312efd7f6eSNathan Lynch 						be32_to_cpu(phandle), action);
3322efd7f6eSNathan Lynch 					continue;
3332efd7f6eSNathan Lynch 				}
3342efd7f6eSNathan Lynch 
335410bccf9SNathan Fontenot 				switch (action) {
336410bccf9SNathan Fontenot 				case DELETE_DT_NODE:
3372efd7f6eSNathan Lynch 					delete_dt_node(np);
338410bccf9SNathan Fontenot 					break;
339410bccf9SNathan Fontenot 				case UPDATE_DT_NODE:
3402efd7f6eSNathan Lynch 					update_dt_node(np, scope);
341410bccf9SNathan Fontenot 					break;
342410bccf9SNathan Fontenot 				case ADD_DT_NODE:
343410bccf9SNathan Fontenot 					drc_index = *data++;
3442efd7f6eSNathan Lynch 					add_dt_node(np, drc_index);
345410bccf9SNathan Fontenot 					break;
346410bccf9SNathan Fontenot 				}
347ccfb5bd7SNathan Lynch 
3482efd7f6eSNathan Lynch 				of_node_put(np);
349ccfb5bd7SNathan Lynch 				cond_resched();
350410bccf9SNathan Fontenot 			}
351410bccf9SNathan Fontenot 		}
352ccfb5bd7SNathan Lynch 
353ccfb5bd7SNathan Lynch 		cond_resched();
354410bccf9SNathan Fontenot 	} while (rc == 1);
355410bccf9SNathan Fontenot 
356410bccf9SNathan Fontenot 	kfree(rtas_buf);
357410bccf9SNathan Fontenot 	return rc;
358410bccf9SNathan Fontenot }
359410bccf9SNathan Fontenot 
360410bccf9SNathan Fontenot void post_mobility_fixup(void)
361410bccf9SNathan Fontenot {
362410bccf9SNathan Fontenot 	int rc;
363410bccf9SNathan Fontenot 
364c3ae9781SNathan Lynch 	rtas_activate_firmware();
36539a33b59SHaren Myneni 
366e59a175fSNathan Lynch 	/*
367e59a175fSNathan Lynch 	 * We don't want CPUs to go online/offline while the device
368e59a175fSNathan Lynch 	 * tree is being updated.
369e59a175fSNathan Lynch 	 */
370e59a175fSNathan Lynch 	cpus_read_lock();
371e59a175fSNathan Lynch 
372e610a466SNathan Lynch 	/*
373e610a466SNathan Lynch 	 * It's common for the destination firmware to replace cache
374e610a466SNathan Lynch 	 * nodes.  Release all of the cacheinfo hierarchy's references
375e610a466SNathan Lynch 	 * before updating the device tree.
376e610a466SNathan Lynch 	 */
377e610a466SNathan Lynch 	cacheinfo_teardown();
378e610a466SNathan Lynch 
379762ec157SNathan Fontenot 	rc = pseries_devicetree_update(MIGRATION_SCOPE);
380410bccf9SNathan Fontenot 	if (rc)
3812d5be6f1SNathan Lynch 		pr_err("device tree update failed: %d\n", rc);
382410bccf9SNathan Fontenot 
383e610a466SNathan Lynch 	cacheinfo_rebuild();
384e610a466SNathan Lynch 
385e59a175fSNathan Lynch 	cpus_read_unlock();
386e59a175fSNathan Lynch 
387da631f7fSDaniel Axtens 	/* Possibly switch to a new L1 flush type */
388da631f7fSDaniel Axtens 	pseries_setup_security_mitigations();
389921bc6cfSMichael Ellerman 
390373b3730SKajol Jain 	/* Reinitialise system information for hv-24x7 */
391373b3730SKajol Jain 	read_24x7_sys_info();
392373b3730SKajol Jain 
393410bccf9SNathan Fontenot 	return;
394410bccf9SNathan Fontenot }
395410bccf9SNathan Fontenot 
396d9213319SNathan Lynch static int poll_vasi_state(u64 handle, unsigned long *res)
397d9213319SNathan Lynch {
398d9213319SNathan Lynch 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
399d9213319SNathan Lynch 	long hvrc;
400d9213319SNathan Lynch 	int ret;
401d9213319SNathan Lynch 
402d9213319SNathan Lynch 	hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
403d9213319SNathan Lynch 	switch (hvrc) {
404d9213319SNathan Lynch 	case H_SUCCESS:
405d9213319SNathan Lynch 		ret = 0;
406d9213319SNathan Lynch 		*res = retbuf[0];
407d9213319SNathan Lynch 		break;
408d9213319SNathan Lynch 	case H_PARAMETER:
409d9213319SNathan Lynch 		ret = -EINVAL;
410d9213319SNathan Lynch 		break;
411d9213319SNathan Lynch 	case H_FUNCTION:
412d9213319SNathan Lynch 		ret = -EOPNOTSUPP;
413d9213319SNathan Lynch 		break;
414d9213319SNathan Lynch 	case H_HARDWARE:
415d9213319SNathan Lynch 	default:
416d9213319SNathan Lynch 		pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
417d9213319SNathan Lynch 		ret = -EIO;
418d9213319SNathan Lynch 		break;
419d9213319SNathan Lynch 	}
420d9213319SNathan Lynch 	return ret;
421d9213319SNathan Lynch }
422d9213319SNathan Lynch 
423d9213319SNathan Lynch static int wait_for_vasi_session_suspending(u64 handle)
424d9213319SNathan Lynch {
425d9213319SNathan Lynch 	unsigned long state;
426d9213319SNathan Lynch 	int ret;
427d9213319SNathan Lynch 
428d9213319SNathan Lynch 	/*
429d9213319SNathan Lynch 	 * Wait for transition from H_VASI_ENABLED to
430d9213319SNathan Lynch 	 * H_VASI_SUSPENDING. Treat anything else as an error.
431d9213319SNathan Lynch 	 */
432d9213319SNathan Lynch 	while (true) {
433d9213319SNathan Lynch 		ret = poll_vasi_state(handle, &state);
434d9213319SNathan Lynch 
435d9213319SNathan Lynch 		if (ret != 0 || state == H_VASI_SUSPENDING) {
436d9213319SNathan Lynch 			break;
437d9213319SNathan Lynch 		} else if (state == H_VASI_ENABLED) {
438d9213319SNathan Lynch 			ssleep(1);
439d9213319SNathan Lynch 		} else {
440d9213319SNathan Lynch 			pr_err("unexpected H_VASI_STATE result %lu\n", state);
441d9213319SNathan Lynch 			ret = -EIO;
442d9213319SNathan Lynch 			break;
443d9213319SNathan Lynch 		}
444d9213319SNathan Lynch 	}
445d9213319SNathan Lynch 
446d9213319SNathan Lynch 	/*
447d9213319SNathan Lynch 	 * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
448d9213319SNathan Lynch 	 * ibm,suspend-me are also unimplemented, we'll recover then.
449d9213319SNathan Lynch 	 */
450d9213319SNathan Lynch 	if (ret == -EOPNOTSUPP)
451d9213319SNathan Lynch 		ret = 0;
452d9213319SNathan Lynch 
453d9213319SNathan Lynch 	return ret;
454d9213319SNathan Lynch }
455d9213319SNathan Lynch 
456882c0d17SLaurent Dufour static void wait_for_vasi_session_completed(u64 handle)
457882c0d17SLaurent Dufour {
458882c0d17SLaurent Dufour 	unsigned long state = 0;
459882c0d17SLaurent Dufour 	int ret;
460882c0d17SLaurent Dufour 
461882c0d17SLaurent Dufour 	pr_info("waiting for memory transfer to complete...\n");
462882c0d17SLaurent Dufour 
463882c0d17SLaurent Dufour 	/*
464882c0d17SLaurent Dufour 	 * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED.
465882c0d17SLaurent Dufour 	 */
466882c0d17SLaurent Dufour 	while (true) {
467882c0d17SLaurent Dufour 		ret = poll_vasi_state(handle, &state);
468882c0d17SLaurent Dufour 
469882c0d17SLaurent Dufour 		/*
470882c0d17SLaurent Dufour 		 * If the memory transfer is already complete and the migration
471882c0d17SLaurent Dufour 		 * has been cleaned up by the hypervisor, H_PARAMETER is return,
472882c0d17SLaurent Dufour 		 * which is translate in EINVAL by poll_vasi_state().
473882c0d17SLaurent Dufour 		 */
474882c0d17SLaurent Dufour 		if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) {
475882c0d17SLaurent Dufour 			pr_info("memory transfer completed.\n");
476882c0d17SLaurent Dufour 			break;
477882c0d17SLaurent Dufour 		}
478882c0d17SLaurent Dufour 
479882c0d17SLaurent Dufour 		if (ret) {
480882c0d17SLaurent Dufour 			pr_err("H_VASI_STATE return error (%d)\n", ret);
481882c0d17SLaurent Dufour 			break;
482882c0d17SLaurent Dufour 		}
483882c0d17SLaurent Dufour 
484882c0d17SLaurent Dufour 		if (state != H_VASI_RESUMED) {
485882c0d17SLaurent Dufour 			pr_err("unexpected H_VASI_STATE result %lu\n", state);
486882c0d17SLaurent Dufour 			break;
487882c0d17SLaurent Dufour 		}
488882c0d17SLaurent Dufour 
489882c0d17SLaurent Dufour 		msleep(500);
490882c0d17SLaurent Dufour 	}
491882c0d17SLaurent Dufour }
492882c0d17SLaurent Dufour 
4939327dc0aSNathan Lynch static void prod_single(unsigned int target_cpu)
4949327dc0aSNathan Lynch {
4959327dc0aSNathan Lynch 	long hvrc;
4969327dc0aSNathan Lynch 	int hwid;
4979327dc0aSNathan Lynch 
4989327dc0aSNathan Lynch 	hwid = get_hard_smp_processor_id(target_cpu);
4999327dc0aSNathan Lynch 	hvrc = plpar_hcall_norets(H_PROD, hwid);
5009327dc0aSNathan Lynch 	if (hvrc == H_SUCCESS)
5019327dc0aSNathan Lynch 		return;
5029327dc0aSNathan Lynch 	pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
5039327dc0aSNathan Lynch 			   target_cpu, hwid, hvrc);
5049327dc0aSNathan Lynch }
5059327dc0aSNathan Lynch 
5069327dc0aSNathan Lynch static void prod_others(void)
5079327dc0aSNathan Lynch {
5089327dc0aSNathan Lynch 	unsigned int cpu;
5099327dc0aSNathan Lynch 
5109327dc0aSNathan Lynch 	for_each_online_cpu(cpu) {
5119327dc0aSNathan Lynch 		if (cpu != smp_processor_id())
5129327dc0aSNathan Lynch 			prod_single(cpu);
5139327dc0aSNathan Lynch 	}
5149327dc0aSNathan Lynch }
5159327dc0aSNathan Lynch 
5169327dc0aSNathan Lynch static u16 clamp_slb_size(void)
5179327dc0aSNathan Lynch {
518387e220aSNicholas Piggin #ifdef CONFIG_PPC_64S_HASH_MMU
5199327dc0aSNathan Lynch 	u16 prev = mmu_slb_size;
5209327dc0aSNathan Lynch 
5219327dc0aSNathan Lynch 	slb_set_size(SLB_MIN_SIZE);
5229327dc0aSNathan Lynch 
5239327dc0aSNathan Lynch 	return prev;
524387e220aSNicholas Piggin #else
525387e220aSNicholas Piggin 	return 0;
526387e220aSNicholas Piggin #endif
5279327dc0aSNathan Lynch }
5289327dc0aSNathan Lynch 
5299327dc0aSNathan Lynch static int do_suspend(void)
5309327dc0aSNathan Lynch {
5319327dc0aSNathan Lynch 	u16 saved_slb_size;
5329327dc0aSNathan Lynch 	int status;
5339327dc0aSNathan Lynch 	int ret;
5349327dc0aSNathan Lynch 
5359327dc0aSNathan Lynch 	pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
5369327dc0aSNathan Lynch 
5379327dc0aSNathan Lynch 	/*
5389327dc0aSNathan Lynch 	 * The destination processor model may have fewer SLB entries
5399327dc0aSNathan Lynch 	 * than the source. We reduce mmu_slb_size to a safe minimum
5409327dc0aSNathan Lynch 	 * before suspending in order to minimize the possibility of
5419327dc0aSNathan Lynch 	 * programming non-existent entries on the destination. If
5429327dc0aSNathan Lynch 	 * suspend fails, we restore it before returning. On success
5439327dc0aSNathan Lynch 	 * the OF reconfig path will update it from the new device
5449327dc0aSNathan Lynch 	 * tree after resuming on the destination.
5459327dc0aSNathan Lynch 	 */
5469327dc0aSNathan Lynch 	saved_slb_size = clamp_slb_size();
5479327dc0aSNathan Lynch 
5489327dc0aSNathan Lynch 	ret = rtas_ibm_suspend_me(&status);
5499327dc0aSNathan Lynch 	if (ret != 0) {
5509327dc0aSNathan Lynch 		pr_err("ibm,suspend-me error: %d\n", status);
5519327dc0aSNathan Lynch 		slb_set_size(saved_slb_size);
5529327dc0aSNathan Lynch 	}
5539327dc0aSNathan Lynch 
5549327dc0aSNathan Lynch 	return ret;
5559327dc0aSNathan Lynch }
5569327dc0aSNathan Lynch 
557e834df6cSNathan Lynch /**
558e834df6cSNathan Lynch  * struct pseries_suspend_info - State shared between CPUs for join/suspend.
559e834df6cSNathan Lynch  * @counter: Threads are to increment this upon resuming from suspend
560e834df6cSNathan Lynch  *           or if an error is received from H_JOIN. The thread which performs
561e834df6cSNathan Lynch  *           the first increment (i.e. sets it to 1) is responsible for
562e834df6cSNathan Lynch  *           waking the other threads.
563274cb1caSNathan Lynch  * @done: False if join/suspend is in progress. True if the operation is
564274cb1caSNathan Lynch  *        complete (successful or not).
565e834df6cSNathan Lynch  */
566e834df6cSNathan Lynch struct pseries_suspend_info {
567e834df6cSNathan Lynch 	atomic_t counter;
568274cb1caSNathan Lynch 	bool done;
569e834df6cSNathan Lynch };
570e834df6cSNathan Lynch 
5719327dc0aSNathan Lynch static int do_join(void *arg)
5729327dc0aSNathan Lynch {
573e834df6cSNathan Lynch 	struct pseries_suspend_info *info = arg;
574e834df6cSNathan Lynch 	atomic_t *counter = &info->counter;
5759327dc0aSNathan Lynch 	long hvrc;
5769327dc0aSNathan Lynch 	int ret;
5779327dc0aSNathan Lynch 
578274cb1caSNathan Lynch retry:
5799327dc0aSNathan Lynch 	/* Must ensure MSR.EE off for H_JOIN. */
5809327dc0aSNathan Lynch 	hard_irq_disable();
5819327dc0aSNathan Lynch 	hvrc = plpar_hcall_norets(H_JOIN);
5829327dc0aSNathan Lynch 
5839327dc0aSNathan Lynch 	switch (hvrc) {
5849327dc0aSNathan Lynch 	case H_CONTINUE:
5859327dc0aSNathan Lynch 		/*
5869327dc0aSNathan Lynch 		 * All other CPUs are offline or in H_JOIN. This CPU
5879327dc0aSNathan Lynch 		 * attempts the suspend.
5889327dc0aSNathan Lynch 		 */
5899327dc0aSNathan Lynch 		ret = do_suspend();
5909327dc0aSNathan Lynch 		break;
5919327dc0aSNathan Lynch 	case H_SUCCESS:
5929327dc0aSNathan Lynch 		/*
5939327dc0aSNathan Lynch 		 * The suspend is complete and this cpu has received a
594274cb1caSNathan Lynch 		 * prod, or we've received a stray prod from unrelated
595274cb1caSNathan Lynch 		 * code (e.g. paravirt spinlocks) and we need to join
596274cb1caSNathan Lynch 		 * again.
597274cb1caSNathan Lynch 		 *
598274cb1caSNathan Lynch 		 * This barrier orders the return from H_JOIN above vs
599274cb1caSNathan Lynch 		 * the load of info->done. It pairs with the barrier
600274cb1caSNathan Lynch 		 * in the wakeup/prod path below.
6019327dc0aSNathan Lynch 		 */
602274cb1caSNathan Lynch 		smp_mb();
603274cb1caSNathan Lynch 		if (READ_ONCE(info->done) == false) {
604274cb1caSNathan Lynch 			pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying",
605274cb1caSNathan Lynch 					    smp_processor_id());
606274cb1caSNathan Lynch 			goto retry;
607274cb1caSNathan Lynch 		}
6089327dc0aSNathan Lynch 		ret = 0;
6099327dc0aSNathan Lynch 		break;
6109327dc0aSNathan Lynch 	case H_BAD_MODE:
6119327dc0aSNathan Lynch 	case H_HARDWARE:
6129327dc0aSNathan Lynch 	default:
6139327dc0aSNathan Lynch 		ret = -EIO;
6149327dc0aSNathan Lynch 		pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
6159327dc0aSNathan Lynch 				   hvrc, smp_processor_id());
6169327dc0aSNathan Lynch 		break;
6179327dc0aSNathan Lynch 	}
6189327dc0aSNathan Lynch 
6199327dc0aSNathan Lynch 	if (atomic_inc_return(counter) == 1) {
6209327dc0aSNathan Lynch 		pr_info("CPU %u waking all threads\n", smp_processor_id());
621274cb1caSNathan Lynch 		WRITE_ONCE(info->done, true);
622274cb1caSNathan Lynch 		/*
623274cb1caSNathan Lynch 		 * This barrier orders the store to info->done vs subsequent
624274cb1caSNathan Lynch 		 * H_PRODs to wake the other CPUs. It pairs with the barrier
625274cb1caSNathan Lynch 		 * in the H_SUCCESS case above.
626274cb1caSNathan Lynch 		 */
627274cb1caSNathan Lynch 		smp_mb();
6289327dc0aSNathan Lynch 		prod_others();
6299327dc0aSNathan Lynch 	}
6309327dc0aSNathan Lynch 	/*
6319b574cfaSLaurent Dufour 	 * Execution may have been suspended for several seconds, so reset
6329b574cfaSLaurent Dufour 	 * the watchdogs. touch_nmi_watchdog() also touches the soft lockup
6339b574cfaSLaurent Dufour 	 * watchdog.
6349327dc0aSNathan Lynch 	 */
6359b574cfaSLaurent Dufour 	rcu_cpu_stall_reset();
6369327dc0aSNathan Lynch 	touch_nmi_watchdog();
6379b574cfaSLaurent Dufour 
6389327dc0aSNathan Lynch 	return ret;
6399327dc0aSNathan Lynch }
6409327dc0aSNathan Lynch 
64137cddc7dSNathan Lynch /*
64237cddc7dSNathan Lynch  * Abort reason code byte 0. We use only the 'Migrating partition' value.
64337cddc7dSNathan Lynch  */
64437cddc7dSNathan Lynch enum vasi_aborting_entity {
64537cddc7dSNathan Lynch 	ORCHESTRATOR        = 1,
64637cddc7dSNathan Lynch 	VSP_SOURCE          = 2,
64737cddc7dSNathan Lynch 	PARTITION_FIRMWARE  = 3,
64837cddc7dSNathan Lynch 	PLATFORM_FIRMWARE   = 4,
64937cddc7dSNathan Lynch 	VSP_TARGET          = 5,
65037cddc7dSNathan Lynch 	MIGRATING_PARTITION = 6,
65137cddc7dSNathan Lynch };
65237cddc7dSNathan Lynch 
65337cddc7dSNathan Lynch static void pseries_cancel_migration(u64 handle, int err)
65437cddc7dSNathan Lynch {
65537cddc7dSNathan Lynch 	u32 reason_code;
65637cddc7dSNathan Lynch 	u32 detail;
65737cddc7dSNathan Lynch 	u8 entity;
65837cddc7dSNathan Lynch 	long hvrc;
65937cddc7dSNathan Lynch 
66037cddc7dSNathan Lynch 	entity = MIGRATING_PARTITION;
66137cddc7dSNathan Lynch 	detail = abs(err) & 0xffffff;
66237cddc7dSNathan Lynch 	reason_code = (entity << 24) | detail;
66337cddc7dSNathan Lynch 
66437cddc7dSNathan Lynch 	hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
66537cddc7dSNathan Lynch 				  H_VASI_SIGNAL_CANCEL, reason_code);
66637cddc7dSNathan Lynch 	if (hvrc)
66737cddc7dSNathan Lynch 		pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
66837cddc7dSNathan Lynch }
66937cddc7dSNathan Lynch 
670aeca35b9SNathan Lynch static int pseries_suspend(u64 handle)
671aeca35b9SNathan Lynch {
672aeca35b9SNathan Lynch 	const unsigned int max_attempts = 5;
673aeca35b9SNathan Lynch 	unsigned int retry_interval_ms = 1;
674aeca35b9SNathan Lynch 	unsigned int attempt = 1;
675aeca35b9SNathan Lynch 	int ret;
676aeca35b9SNathan Lynch 
677aeca35b9SNathan Lynch 	while (true) {
678e834df6cSNathan Lynch 		struct pseries_suspend_info info;
679aeca35b9SNathan Lynch 		unsigned long vasi_state;
680aeca35b9SNathan Lynch 		int vasi_err;
681aeca35b9SNathan Lynch 
682e834df6cSNathan Lynch 		info = (struct pseries_suspend_info) {
683e834df6cSNathan Lynch 			.counter = ATOMIC_INIT(0),
684274cb1caSNathan Lynch 			.done = false,
685e834df6cSNathan Lynch 		};
686e834df6cSNathan Lynch 
687e834df6cSNathan Lynch 		ret = stop_machine(do_join, &info, cpu_online_mask);
688aeca35b9SNathan Lynch 		if (ret == 0)
689aeca35b9SNathan Lynch 			break;
690aeca35b9SNathan Lynch 		/*
691aeca35b9SNathan Lynch 		 * Encountered an error. If the VASI stream is still
692aeca35b9SNathan Lynch 		 * in Suspending state, it's likely a transient
693aeca35b9SNathan Lynch 		 * condition related to some device in the partition
694aeca35b9SNathan Lynch 		 * and we can retry in the hope that the cause has
695aeca35b9SNathan Lynch 		 * cleared after some delay.
696aeca35b9SNathan Lynch 		 *
697aeca35b9SNathan Lynch 		 * A better design would allow drivers etc to prepare
698aeca35b9SNathan Lynch 		 * for the suspend and avoid conditions which prevent
699aeca35b9SNathan Lynch 		 * the suspend from succeeding. For now, we have this
700aeca35b9SNathan Lynch 		 * mitigation.
701aeca35b9SNathan Lynch 		 */
702aeca35b9SNathan Lynch 		pr_notice("Partition suspend attempt %u of %u error: %d\n",
703aeca35b9SNathan Lynch 			  attempt, max_attempts, ret);
704aeca35b9SNathan Lynch 
705aeca35b9SNathan Lynch 		if (attempt == max_attempts)
706aeca35b9SNathan Lynch 			break;
707aeca35b9SNathan Lynch 
708aeca35b9SNathan Lynch 		vasi_err = poll_vasi_state(handle, &vasi_state);
709aeca35b9SNathan Lynch 		if (vasi_err == 0) {
710aeca35b9SNathan Lynch 			if (vasi_state != H_VASI_SUSPENDING) {
711aeca35b9SNathan Lynch 				pr_notice("VASI state %lu after failed suspend\n",
712aeca35b9SNathan Lynch 					  vasi_state);
713aeca35b9SNathan Lynch 				break;
714aeca35b9SNathan Lynch 			}
715aeca35b9SNathan Lynch 		} else if (vasi_err != -EOPNOTSUPP) {
716aeca35b9SNathan Lynch 			pr_err("VASI state poll error: %d", vasi_err);
717aeca35b9SNathan Lynch 			break;
718aeca35b9SNathan Lynch 		}
719aeca35b9SNathan Lynch 
720aeca35b9SNathan Lynch 		pr_notice("Will retry partition suspend after %u ms\n",
721aeca35b9SNathan Lynch 			  retry_interval_ms);
722aeca35b9SNathan Lynch 
723aeca35b9SNathan Lynch 		msleep(retry_interval_ms);
724aeca35b9SNathan Lynch 		retry_interval_ms *= 10;
725aeca35b9SNathan Lynch 		attempt++;
726aeca35b9SNathan Lynch 	}
727aeca35b9SNathan Lynch 
728aeca35b9SNathan Lynch 	return ret;
729aeca35b9SNathan Lynch }
730aeca35b9SNathan Lynch 
7319327dc0aSNathan Lynch static int pseries_migrate_partition(u64 handle)
7329327dc0aSNathan Lynch {
7339327dc0aSNathan Lynch 	int ret;
734118b1366SLaurent Dufour 	unsigned int factor = 0;
7359327dc0aSNathan Lynch 
736118b1366SLaurent Dufour #ifdef CONFIG_PPC_WATCHDOG
737118b1366SLaurent Dufour 	factor = nmi_wd_lpm_factor;
738118b1366SLaurent Dufour #endif
739465dda9dSHaren Myneni 	/*
740465dda9dSHaren Myneni 	 * When the migration is initiated, the hypervisor changes VAS
741465dda9dSHaren Myneni 	 * mappings to prepare before OS gets the notification and
742465dda9dSHaren Myneni 	 * closes all VAS windows. NX generates continuous faults during
743465dda9dSHaren Myneni 	 * this time and the user space can not differentiate these
744465dda9dSHaren Myneni 	 * faults from the migration event. So reduce this time window
745465dda9dSHaren Myneni 	 * by closing VAS windows at the beginning of this function.
746465dda9dSHaren Myneni 	 */
747465dda9dSHaren Myneni 	vas_migration_handler(VAS_SUSPEND);
748465dda9dSHaren Myneni 
7499327dc0aSNathan Lynch 	ret = wait_for_vasi_session_suspending(handle);
7509327dc0aSNathan Lynch 	if (ret)
751465dda9dSHaren Myneni 		goto out;
75237e67648SHaren Myneni 
753118b1366SLaurent Dufour 	if (factor)
754df95d308SDouglas Anderson 		watchdog_hardlockup_set_timeout_pct(factor);
755118b1366SLaurent Dufour 
756aeca35b9SNathan Lynch 	ret = pseries_suspend(handle);
757882c0d17SLaurent Dufour 	if (ret == 0) {
7589327dc0aSNathan Lynch 		post_mobility_fixup();
759882c0d17SLaurent Dufour 		/*
760882c0d17SLaurent Dufour 		 * Wait until the memory transfer is complete, so that the user
761882c0d17SLaurent Dufour 		 * space process returns from the syscall after the transfer is
762882c0d17SLaurent Dufour 		 * complete. This allows the user hooks to be executed at the
763882c0d17SLaurent Dufour 		 * right time.
764882c0d17SLaurent Dufour 		 */
765882c0d17SLaurent Dufour 		wait_for_vasi_session_completed(handle);
766882c0d17SLaurent Dufour 	} else
76737cddc7dSNathan Lynch 		pseries_cancel_migration(handle, ret);
7689327dc0aSNathan Lynch 
769118b1366SLaurent Dufour 	if (factor)
770df95d308SDouglas Anderson 		watchdog_hardlockup_set_timeout_pct(0);
771118b1366SLaurent Dufour 
772465dda9dSHaren Myneni out:
77337e67648SHaren Myneni 	vas_migration_handler(VAS_RESUME);
77437e67648SHaren Myneni 
7759327dc0aSNathan Lynch 	return ret;
7769327dc0aSNathan Lynch }
7779327dc0aSNathan Lynch 
7784d756894SNathan Lynch int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
7794d756894SNathan Lynch {
7804d756894SNathan Lynch 	return pseries_migrate_partition(handle);
7814d756894SNathan Lynch }
7824d756894SNathan Lynch 
78375a2d422SGreg Kroah-Hartman static ssize_t migration_store(const struct class *class,
78475a2d422SGreg Kroah-Hartman 			       const struct class_attribute *attr, const char *buf,
7856f428096SGreg Kroah-Hartman 			       size_t count)
786410bccf9SNathan Fontenot {
787410bccf9SNathan Fontenot 	u64 streamid;
788410bccf9SNathan Fontenot 	int rc;
789410bccf9SNathan Fontenot 
7901618bd53SDaniel Walter 	rc = kstrtou64(buf, 0, &streamid);
791410bccf9SNathan Fontenot 	if (rc)
792410bccf9SNathan Fontenot 		return rc;
793410bccf9SNathan Fontenot 
7949327dc0aSNathan Lynch 	rc = pseries_migrate_partition(streamid);
795d9213319SNathan Lynch 	if (rc)
796d9213319SNathan Lynch 		return rc;
797410bccf9SNathan Fontenot 
798410bccf9SNathan Fontenot 	return count;
799410bccf9SNathan Fontenot }
800410bccf9SNathan Fontenot 
801288a298cSTyrel Datwyler /*
802288a298cSTyrel Datwyler  * Used by drmgr to determine the kernel behavior of the migration interface.
803288a298cSTyrel Datwyler  *
804288a298cSTyrel Datwyler  * Version 1: Performs all PAPR requirements for migration including
805288a298cSTyrel Datwyler  *	firmware activation and device tree update.
806288a298cSTyrel Datwyler  */
807288a298cSTyrel Datwyler #define MIGRATION_API_VERSION	1
808288a298cSTyrel Datwyler 
8096f428096SGreg Kroah-Hartman static CLASS_ATTR_WO(migration);
81057ad583fSRussell Currey static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
811410bccf9SNathan Fontenot 
812410bccf9SNathan Fontenot static int __init mobility_sysfs_init(void)
813410bccf9SNathan Fontenot {
814410bccf9SNathan Fontenot 	int rc;
815410bccf9SNathan Fontenot 
816410bccf9SNathan Fontenot 	mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
817410bccf9SNathan Fontenot 	if (!mobility_kobj)
818410bccf9SNathan Fontenot 		return -ENOMEM;
819410bccf9SNathan Fontenot 
820410bccf9SNathan Fontenot 	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
821288a298cSTyrel Datwyler 	if (rc)
822494a66f3SNathan Lynch 		pr_err("unable to create migration sysfs file (%d)\n", rc);
823410bccf9SNathan Fontenot 
824288a298cSTyrel Datwyler 	rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
825288a298cSTyrel Datwyler 	if (rc)
826494a66f3SNathan Lynch 		pr_err("unable to create api_version sysfs file (%d)\n", rc);
827288a298cSTyrel Datwyler 
828288a298cSTyrel Datwyler 	return 0;
829410bccf9SNathan Fontenot }
8308e83e905SMichael Ellerman machine_device_initcall(pseries, mobility_sysfs_init);
831