xref: /linux/drivers/misc/sgi-xp/xpc_uv.c (revision 1dd419145d090f8fdf149cbb39dea6d968659dd2)
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8 
9 /*
10  * Cross Partition Communication (XPC) uv-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/interrupt.h>
19 #include <linux/delay.h>
20 #include <linux/device.h>
21 #include <linux/cpu.h>
22 #include <linux/module.h>
23 #include <linux/err.h>
24 #include <linux/slab.h>
25 #include <linux/numa.h>
26 #include <asm/uv/uv_hub.h>
27 #include <asm/uv/bios.h>
28 #include <asm/uv/uv_irq.h>
29 #include "../sgi-gru/gru.h"
30 #include "../sgi-gru/grukservices.h"
31 #include "xpc.h"
32 
33 static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
34 
35 #define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
36 #define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
37 					 XPC_ACTIVATE_MSG_SIZE_UV)
38 #define XPC_ACTIVATE_IRQ_NAME		"xpc_activate"
39 
40 #define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
41 #define XPC_NOTIFY_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
42 					 XPC_NOTIFY_MSG_SIZE_UV)
43 #define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
44 
45 static int xpc_mq_node = NUMA_NO_NODE;
46 
47 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
48 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
49 
50 static int
51 xpc_setup_partitions_uv(void)
52 {
53 	short partid;
54 	struct xpc_partition_uv *part_uv;
55 
56 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
57 		part_uv = &xpc_partitions[partid].sn.uv;
58 
59 		mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
60 		spin_lock_init(&part_uv->flags_lock);
61 		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
62 	}
63 	return 0;
64 }
65 
66 static void
67 xpc_teardown_partitions_uv(void)
68 {
69 	short partid;
70 	struct xpc_partition_uv *part_uv;
71 	unsigned long irq_flags;
72 
73 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
74 		part_uv = &xpc_partitions[partid].sn.uv;
75 
76 		if (part_uv->cached_activate_gru_mq_desc != NULL) {
77 			mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
78 			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
79 			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
80 			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
81 			kfree(part_uv->cached_activate_gru_mq_desc);
82 			part_uv->cached_activate_gru_mq_desc = NULL;
83 			mutex_unlock(&part_uv->
84 				     cached_activate_gru_mq_desc_mutex);
85 		}
86 	}
87 }
88 
89 static int
90 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
91 {
92 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
93 
94 	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
95 			UV_AFFINITY_CPU);
96 	if (mq->irq < 0)
97 		return mq->irq;
98 
99 	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
100 
101 	return 0;
102 }
103 
104 static void
105 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
106 {
107 	uv_teardown_irq(mq->irq);
108 }
109 
110 static int
111 xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
112 {
113 	int ret;
114 
115 	ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
116 					 mq->order, &mq->mmr_offset);
117 	if (ret < 0) {
118 		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
119 			"ret=%d\n", ret);
120 		return ret;
121 	}
122 
123 	mq->watchlist_num = ret;
124 	return 0;
125 }
126 
127 static void
128 xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
129 {
130 	int ret;
131 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
132 
133 	ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
134 	BUG_ON(ret != BIOS_STATUS_SUCCESS);
135 }
136 
137 static struct xpc_gru_mq_uv *
138 xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
139 		     irq_handler_t irq_handler)
140 {
141 	enum xp_retval xp_ret;
142 	int ret;
143 	int nid;
144 	int nasid;
145 	int pg_order;
146 	struct page *page;
147 	struct xpc_gru_mq_uv *mq;
148 	struct uv_IO_APIC_route_entry *mmr_value;
149 
150 	mq = kmalloc_obj(struct xpc_gru_mq_uv);
151 	if (mq == NULL) {
152 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
153 			"a xpc_gru_mq_uv structure\n");
154 		ret = -ENOMEM;
155 		goto out_0;
156 	}
157 
158 	mq->gru_mq_desc = kzalloc_obj(struct gru_message_queue_desc);
159 	if (mq->gru_mq_desc == NULL) {
160 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
161 			"a gru_message_queue_desc structure\n");
162 		ret = -ENOMEM;
163 		goto out_1;
164 	}
165 
166 	pg_order = get_order(mq_size);
167 	mq->order = pg_order + PAGE_SHIFT;
168 	mq_size = 1UL << mq->order;
169 
170 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
171 
172 	nid = cpu_to_node(cpu);
173 	page = __alloc_pages_node(nid,
174 				      GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
175 				      pg_order);
176 	if (page == NULL) {
177 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
178 			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
179 		ret = -ENOMEM;
180 		goto out_2;
181 	}
182 	mq->address = page_address(page);
183 
184 	/* enable generation of irq when GRU mq operation occurs to this mq */
185 	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
186 	if (ret != 0)
187 		goto out_3;
188 
189 	ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
190 	if (ret != 0)
191 		goto out_4;
192 
193 	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
194 	if (ret != 0) {
195 		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
196 			mq->irq, -ret);
197 		goto out_5;
198 	}
199 
200 	nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
201 
202 	mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
203 	ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
204 				     nasid, mmr_value->vector, mmr_value->dest);
205 	if (ret != 0) {
206 		dev_err(xpc_part, "gru_create_message_queue() returned "
207 			"error=%d\n", ret);
208 		ret = -EINVAL;
209 		goto out_6;
210 	}
211 
212 	/* allow other partitions to access this GRU mq */
213 	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
214 	if (xp_ret != xpSuccess) {
215 		ret = -EACCES;
216 		goto out_6;
217 	}
218 
219 	return mq;
220 
221 	/* something went wrong */
222 out_6:
223 	free_irq(mq->irq, NULL);
224 out_5:
225 	xpc_release_gru_mq_irq_uv(mq);
226 out_4:
227 	xpc_gru_mq_watchlist_free_uv(mq);
228 out_3:
229 	free_pages((unsigned long)mq->address, pg_order);
230 out_2:
231 	kfree(mq->gru_mq_desc);
232 out_1:
233 	kfree(mq);
234 out_0:
235 	return ERR_PTR(ret);
236 }
237 
238 static void
239 xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
240 {
241 	unsigned int mq_size;
242 	int pg_order;
243 	int ret;
244 
245 	/* disallow other partitions to access GRU mq */
246 	mq_size = 1UL << mq->order;
247 	ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
248 	BUG_ON(ret != xpSuccess);
249 
250 	/* unregister irq handler and release mq irq/vector mapping */
251 	free_irq(mq->irq, NULL);
252 	xpc_release_gru_mq_irq_uv(mq);
253 
254 	/* disable generation of irq when GRU mq op occurs to this mq */
255 	xpc_gru_mq_watchlist_free_uv(mq);
256 
257 	pg_order = mq->order - PAGE_SHIFT;
258 	free_pages((unsigned long)mq->address, pg_order);
259 
260 	kfree(mq);
261 }
262 
263 static enum xp_retval
264 xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
265 		 size_t msg_size)
266 {
267 	enum xp_retval xp_ret;
268 	int ret;
269 
270 	while (1) {
271 		ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
272 		if (ret == MQE_OK) {
273 			xp_ret = xpSuccess;
274 			break;
275 		}
276 
277 		if (ret == MQE_QUEUE_FULL) {
278 			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
279 				"error=MQE_QUEUE_FULL\n");
280 			/* !!! handle QLimit reached; delay & try again */
281 			/* ??? Do we add a limit to the number of retries? */
282 			(void)msleep_interruptible(10);
283 		} else if (ret == MQE_CONGESTION) {
284 			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
285 				"error=MQE_CONGESTION\n");
286 			/* !!! handle LB Overflow; simply try again */
287 			/* ??? Do we add a limit to the number of retries? */
288 		} else {
289 			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
290 			dev_err(xpc_chan, "gru_send_message_gpa() returned "
291 				"error=%d\n", ret);
292 			xp_ret = xpGruSendMqError;
293 			break;
294 		}
295 	}
296 	return xp_ret;
297 }
298 
299 static void
300 xpc_process_activate_IRQ_rcvd_uv(void)
301 {
302 	unsigned long irq_flags;
303 	short partid;
304 	struct xpc_partition *part;
305 	u8 act_state_req;
306 
307 	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
308 
309 	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
310 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
311 		part = &xpc_partitions[partid];
312 
313 		if (part->sn.uv.act_state_req == 0)
314 			continue;
315 
316 		xpc_activate_IRQ_rcvd--;
317 		BUG_ON(xpc_activate_IRQ_rcvd < 0);
318 
319 		act_state_req = part->sn.uv.act_state_req;
320 		part->sn.uv.act_state_req = 0;
321 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
322 
323 		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
324 			if (part->act_state == XPC_P_AS_INACTIVE)
325 				xpc_activate_partition(part);
326 			else if (part->act_state == XPC_P_AS_DEACTIVATING)
327 				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
328 
329 		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
330 			if (part->act_state == XPC_P_AS_INACTIVE)
331 				xpc_activate_partition(part);
332 			else
333 				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
334 
335 		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
336 			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
337 
338 		} else {
339 			BUG();
340 		}
341 
342 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
343 		if (xpc_activate_IRQ_rcvd == 0)
344 			break;
345 	}
346 	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
347 
348 }
349 
350 static void
351 xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
352 			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
353 			      int part_setup,
354 			      int *wakeup_hb_checker)
355 {
356 	unsigned long irq_flags;
357 	struct xpc_partition_uv *part_uv = &part->sn.uv;
358 	struct xpc_openclose_args *args;
359 
360 	part_uv->remote_act_state = msg_hdr->act_state;
361 
362 	switch (msg_hdr->type) {
363 	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
364 		/* syncing of remote_act_state was just done above */
365 		break;
366 
367 	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
368 		struct xpc_activate_mq_msg_activate_req_uv *msg;
369 
370 		/*
371 		 * ??? Do we deal here with ts_jiffies being different
372 		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
373 		 * ??? below?
374 		 */
375 		msg = container_of(msg_hdr, struct
376 				   xpc_activate_mq_msg_activate_req_uv, hdr);
377 
378 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
379 		if (part_uv->act_state_req == 0)
380 			xpc_activate_IRQ_rcvd++;
381 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
382 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
383 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
384 		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
385 
386 		if (msg->activate_gru_mq_desc_gpa !=
387 		    part_uv->activate_gru_mq_desc_gpa) {
388 			spin_lock(&part_uv->flags_lock);
389 			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
390 			spin_unlock(&part_uv->flags_lock);
391 			part_uv->activate_gru_mq_desc_gpa =
392 			    msg->activate_gru_mq_desc_gpa;
393 		}
394 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
395 
396 		(*wakeup_hb_checker)++;
397 		break;
398 	}
399 	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
400 		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
401 
402 		msg = container_of(msg_hdr, struct
403 				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
404 
405 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
406 		if (part_uv->act_state_req == 0)
407 			xpc_activate_IRQ_rcvd++;
408 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
409 		part_uv->reason = msg->reason;
410 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
411 
412 		(*wakeup_hb_checker)++;
413 		return;
414 	}
415 	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
416 		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
417 
418 		if (!part_setup)
419 			break;
420 
421 		msg = container_of(msg_hdr, struct
422 				   xpc_activate_mq_msg_chctl_closerequest_uv,
423 				   hdr);
424 		args = &part->remote_openclose_args[msg->ch_number];
425 		args->reason = msg->reason;
426 
427 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
428 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
429 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
430 
431 		xpc_wakeup_channel_mgr(part);
432 		break;
433 	}
434 	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
435 		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
436 
437 		if (!part_setup)
438 			break;
439 
440 		msg = container_of(msg_hdr, struct
441 				   xpc_activate_mq_msg_chctl_closereply_uv,
442 				   hdr);
443 
444 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
445 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
446 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
447 
448 		xpc_wakeup_channel_mgr(part);
449 		break;
450 	}
451 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
452 		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
453 
454 		if (!part_setup)
455 			break;
456 
457 		msg = container_of(msg_hdr, struct
458 				   xpc_activate_mq_msg_chctl_openrequest_uv,
459 				   hdr);
460 		args = &part->remote_openclose_args[msg->ch_number];
461 		args->entry_size = msg->entry_size;
462 		args->local_nentries = msg->local_nentries;
463 
464 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
465 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
466 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
467 
468 		xpc_wakeup_channel_mgr(part);
469 		break;
470 	}
471 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
472 		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
473 
474 		if (!part_setup)
475 			break;
476 
477 		msg = container_of(msg_hdr, struct
478 				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
479 		args = &part->remote_openclose_args[msg->ch_number];
480 		args->remote_nentries = msg->remote_nentries;
481 		args->local_nentries = msg->local_nentries;
482 		args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
483 
484 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
485 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
486 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
487 
488 		xpc_wakeup_channel_mgr(part);
489 		break;
490 	}
491 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
492 		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
493 
494 		if (!part_setup)
495 			break;
496 
497 		msg = container_of(msg_hdr, struct
498 				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
499 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
500 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
501 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
502 
503 		xpc_wakeup_channel_mgr(part);
504 	}
505 		fallthrough;
506 	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
507 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
508 		part_uv->flags |= XPC_P_ENGAGED_UV;
509 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
510 		break;
511 
512 	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
513 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
514 		part_uv->flags &= ~XPC_P_ENGAGED_UV;
515 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
516 		break;
517 
518 	default:
519 		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
520 			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
521 
522 		/* get hb checker to deactivate from the remote partition */
523 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
524 		if (part_uv->act_state_req == 0)
525 			xpc_activate_IRQ_rcvd++;
526 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
527 		part_uv->reason = xpBadMsgType;
528 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
529 
530 		(*wakeup_hb_checker)++;
531 		return;
532 	}
533 
534 	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
535 	    part->remote_rp_ts_jiffies != 0) {
536 		/*
537 		 * ??? Does what we do here need to be sensitive to
538 		 * ??? act_state or remote_act_state?
539 		 */
540 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
541 		if (part_uv->act_state_req == 0)
542 			xpc_activate_IRQ_rcvd++;
543 		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
544 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
545 
546 		(*wakeup_hb_checker)++;
547 	}
548 }
549 
550 static irqreturn_t
551 xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
552 {
553 	struct xpc_activate_mq_msghdr_uv *msg_hdr;
554 	short partid;
555 	struct xpc_partition *part;
556 	int wakeup_hb_checker = 0;
557 	int part_referenced;
558 
559 	while (1) {
560 		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
561 		if (msg_hdr == NULL)
562 			break;
563 
564 		partid = msg_hdr->partid;
565 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
566 			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
567 				"received invalid partid=0x%x in message\n",
568 				partid);
569 		} else {
570 			part = &xpc_partitions[partid];
571 
572 			part_referenced = xpc_part_ref(part);
573 			xpc_handle_activate_mq_msg_uv(part, msg_hdr,
574 						      part_referenced,
575 						      &wakeup_hb_checker);
576 			if (part_referenced)
577 				xpc_part_deref(part);
578 		}
579 
580 		gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
581 	}
582 
583 	if (wakeup_hb_checker)
584 		wake_up_interruptible(&xpc_activate_IRQ_wq);
585 
586 	return IRQ_HANDLED;
587 }
588 
589 static enum xp_retval
590 xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
591 				unsigned long gru_mq_desc_gpa)
592 {
593 	enum xp_retval ret;
594 
595 	ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
596 			       sizeof(struct gru_message_queue_desc));
597 	if (ret == xpSuccess)
598 		gru_mq_desc->mq = NULL;
599 
600 	return ret;
601 }
602 
603 static enum xp_retval
604 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
605 			 int msg_type)
606 {
607 	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
608 	struct xpc_partition_uv *part_uv = &part->sn.uv;
609 	struct gru_message_queue_desc *gru_mq_desc;
610 	unsigned long irq_flags;
611 	enum xp_retval ret;
612 
613 	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
614 
615 	msg_hdr->type = msg_type;
616 	msg_hdr->partid = xp_partition_id;
617 	msg_hdr->act_state = part->act_state;
618 	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
619 
620 	mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
621 again:
622 	if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
623 		gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
624 		if (gru_mq_desc == NULL) {
625 			gru_mq_desc = kmalloc_obj(struct gru_message_queue_desc,
626 						  GFP_ATOMIC);
627 			if (gru_mq_desc == NULL) {
628 				ret = xpNoMemory;
629 				goto done;
630 			}
631 			part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
632 		}
633 
634 		ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
635 						      part_uv->
636 						      activate_gru_mq_desc_gpa);
637 		if (ret != xpSuccess)
638 			goto done;
639 
640 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
641 		part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
642 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
643 	}
644 
645 	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
646 	ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
647 			       msg_size);
648 	if (ret != xpSuccess) {
649 		smp_rmb();	/* ensure a fresh copy of part_uv->flags */
650 		if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
651 			goto again;
652 	}
653 done:
654 	mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
655 	return ret;
656 }
657 
658 static void
659 xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
660 			      size_t msg_size, int msg_type)
661 {
662 	enum xp_retval ret;
663 
664 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
665 	if (unlikely(ret != xpSuccess))
666 		XPC_DEACTIVATE_PARTITION(part, ret);
667 }
668 
669 static void
670 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
671 			 void *msg, size_t msg_size, int msg_type)
672 {
673 	struct xpc_partition *part = &xpc_partitions[ch->partid];
674 	enum xp_retval ret;
675 
676 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
677 	if (unlikely(ret != xpSuccess)) {
678 		if (irq_flags != NULL)
679 			spin_unlock_irqrestore(&ch->lock, *irq_flags);
680 
681 		XPC_DEACTIVATE_PARTITION(part, ret);
682 
683 		if (irq_flags != NULL)
684 			spin_lock_irqsave(&ch->lock, *irq_flags);
685 	}
686 }
687 
688 static void
689 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
690 {
691 	unsigned long irq_flags;
692 	struct xpc_partition_uv *part_uv = &part->sn.uv;
693 
694 	/*
695 	 * !!! Make our side think that the remote partition sent an activate
696 	 * !!! mq message our way by doing what the activate IRQ handler would
697 	 * !!! do had one really been sent.
698 	 */
699 
700 	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
701 	if (part_uv->act_state_req == 0)
702 		xpc_activate_IRQ_rcvd++;
703 	part_uv->act_state_req = act_state_req;
704 	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
705 
706 	wake_up_interruptible(&xpc_activate_IRQ_wq);
707 }
708 
709 static enum xp_retval
710 xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
711 				  size_t *len)
712 {
713 	s64 status;
714 	enum xp_retval ret;
715 
716 	status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
717 					  (u64 *)len);
718 	if (status == BIOS_STATUS_SUCCESS)
719 		ret = xpSuccess;
720 	else if (status == BIOS_STATUS_MORE_PASSES)
721 		ret = xpNeedMoreInfo;
722 	else
723 		ret = xpBiosError;
724 
725 	return ret;
726 }
727 
728 static int
729 xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
730 {
731 	xpc_heartbeat_uv =
732 	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
733 	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
734 	rp->sn.uv.activate_gru_mq_desc_gpa =
735 	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
736 	return 0;
737 }
738 
739 static void
740 xpc_allow_hb_uv(short partid)
741 {
742 }
743 
744 static void
745 xpc_disallow_hb_uv(short partid)
746 {
747 }
748 
749 static void
750 xpc_disallow_all_hbs_uv(void)
751 {
752 }
753 
754 static void
755 xpc_increment_heartbeat_uv(void)
756 {
757 	xpc_heartbeat_uv->value++;
758 }
759 
760 static void
761 xpc_offline_heartbeat_uv(void)
762 {
763 	xpc_increment_heartbeat_uv();
764 	xpc_heartbeat_uv->offline = 1;
765 }
766 
767 static void
768 xpc_online_heartbeat_uv(void)
769 {
770 	xpc_increment_heartbeat_uv();
771 	xpc_heartbeat_uv->offline = 0;
772 }
773 
774 static void
775 xpc_heartbeat_init_uv(void)
776 {
777 	xpc_heartbeat_uv->value = 1;
778 	xpc_heartbeat_uv->offline = 0;
779 }
780 
781 static void
782 xpc_heartbeat_exit_uv(void)
783 {
784 	xpc_offline_heartbeat_uv();
785 }
786 
787 static enum xp_retval
788 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
789 {
790 	struct xpc_partition_uv *part_uv = &part->sn.uv;
791 	enum xp_retval ret;
792 
793 	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
794 			       part_uv->heartbeat_gpa,
795 			       sizeof(struct xpc_heartbeat_uv));
796 	if (ret != xpSuccess)
797 		return ret;
798 
799 	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
800 	    !part_uv->cached_heartbeat.offline) {
801 
802 		ret = xpNoHeartbeat;
803 	} else {
804 		part->last_heartbeat = part_uv->cached_heartbeat.value;
805 	}
806 	return ret;
807 }
808 
809 static void
810 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
811 				    unsigned long remote_rp_gpa, int nasid)
812 {
813 	short partid = remote_rp->SAL_partid;
814 	struct xpc_partition *part = &xpc_partitions[partid];
815 	struct xpc_activate_mq_msg_activate_req_uv msg;
816 
817 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
818 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
819 	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
820 	part->sn.uv.activate_gru_mq_desc_gpa =
821 	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
822 
823 	/*
824 	 * ??? Is it a good idea to make this conditional on what is
825 	 * ??? potentially stale state information?
826 	 */
827 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
828 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
829 		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
830 		msg.activate_gru_mq_desc_gpa =
831 		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
832 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
833 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
834 	}
835 
836 	if (part->act_state == XPC_P_AS_INACTIVE)
837 		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
838 }
839 
840 static void
841 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
842 {
843 	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
844 }
845 
846 static void
847 xpc_request_partition_deactivation_uv(struct xpc_partition *part)
848 {
849 	struct xpc_activate_mq_msg_deactivate_req_uv msg;
850 
851 	/*
852 	 * ??? Is it a good idea to make this conditional on what is
853 	 * ??? potentially stale state information?
854 	 */
855 	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
856 	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
857 
858 		msg.reason = part->reason;
859 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
860 					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
861 	}
862 }
863 
864 static void
865 xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
866 {
867 	/* nothing needs to be done */
868 	return;
869 }
870 
871 static void
872 xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
873 {
874 	head->first = NULL;
875 	head->last = NULL;
876 	spin_lock_init(&head->lock);
877 	head->n_entries = 0;
878 }
879 
880 static void *
881 xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
882 {
883 	unsigned long irq_flags;
884 	struct xpc_fifo_entry_uv *first;
885 
886 	spin_lock_irqsave(&head->lock, irq_flags);
887 	first = head->first;
888 	if (head->first != NULL) {
889 		head->first = first->next;
890 		if (head->first == NULL)
891 			head->last = NULL;
892 
893 		head->n_entries--;
894 		BUG_ON(head->n_entries < 0);
895 
896 		first->next = NULL;
897 	}
898 	spin_unlock_irqrestore(&head->lock, irq_flags);
899 	return first;
900 }
901 
902 static void
903 xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
904 		      struct xpc_fifo_entry_uv *last)
905 {
906 	unsigned long irq_flags;
907 
908 	last->next = NULL;
909 	spin_lock_irqsave(&head->lock, irq_flags);
910 	if (head->last != NULL)
911 		head->last->next = last;
912 	else
913 		head->first = last;
914 	head->last = last;
915 	head->n_entries++;
916 	spin_unlock_irqrestore(&head->lock, irq_flags);
917 }
918 
919 static int
920 xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
921 {
922 	return head->n_entries;
923 }
924 
925 /*
926  * Setup the channel structures that are uv specific.
927  */
928 static enum xp_retval
929 xpc_setup_ch_structures_uv(struct xpc_partition *part)
930 {
931 	struct xpc_channel_uv *ch_uv;
932 	int ch_number;
933 
934 	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
935 		ch_uv = &part->channels[ch_number].sn.uv;
936 
937 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
938 		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
939 	}
940 
941 	return xpSuccess;
942 }
943 
944 /*
945  * Teardown the channel structures that are uv specific.
946  */
947 static void
948 xpc_teardown_ch_structures_uv(struct xpc_partition *part)
949 {
950 	/* nothing needs to be done */
951 	return;
952 }
953 
954 static enum xp_retval
955 xpc_make_first_contact_uv(struct xpc_partition *part)
956 {
957 	struct xpc_activate_mq_msg_uv msg;
958 
959 	/*
960 	 * We send a sync msg to get the remote partition's remote_act_state
961 	 * updated to our current act_state which at this point should
962 	 * be XPC_P_AS_ACTIVATING.
963 	 */
964 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
965 				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
966 
967 	while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
968 		 (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
969 
970 		dev_dbg(xpc_part, "waiting to make first contact with "
971 			"partition %d\n", XPC_PARTID(part));
972 
973 		/* wait a 1/4 of a second or so */
974 		(void)msleep_interruptible(250);
975 
976 		if (part->act_state == XPC_P_AS_DEACTIVATING)
977 			return part->reason;
978 	}
979 
980 	return xpSuccess;
981 }
982 
983 static u64
984 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
985 {
986 	unsigned long irq_flags;
987 	union xpc_channel_ctl_flags chctl;
988 
989 	spin_lock_irqsave(&part->chctl_lock, irq_flags);
990 	chctl = part->chctl;
991 	if (chctl.all_flags != 0)
992 		part->chctl.all_flags = 0;
993 
994 	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
995 	return chctl.all_flags;
996 }
997 
998 static enum xp_retval
999 xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
1000 {
1001 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1002 	struct xpc_send_msg_slot_uv *msg_slot;
1003 	unsigned long irq_flags;
1004 	int nentries;
1005 	int entry;
1006 	size_t nbytes;
1007 
1008 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1009 		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
1010 		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1011 		if (ch_uv->send_msg_slots == NULL)
1012 			continue;
1013 
1014 		for (entry = 0; entry < nentries; entry++) {
1015 			msg_slot = &ch_uv->send_msg_slots[entry];
1016 
1017 			msg_slot->msg_slot_number = entry;
1018 			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
1019 					      &msg_slot->next);
1020 		}
1021 
1022 		spin_lock_irqsave(&ch->lock, irq_flags);
1023 		if (nentries < ch->local_nentries)
1024 			ch->local_nentries = nentries;
1025 		spin_unlock_irqrestore(&ch->lock, irq_flags);
1026 		return xpSuccess;
1027 	}
1028 
1029 	return xpNoMemory;
1030 }
1031 
1032 static enum xp_retval
1033 xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
1034 {
1035 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1036 	struct xpc_notify_mq_msg_uv *msg_slot;
1037 	unsigned long irq_flags;
1038 	int nentries;
1039 	int entry;
1040 	size_t nbytes;
1041 
1042 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1043 		nbytes = nentries * ch->entry_size;
1044 		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1045 		if (ch_uv->recv_msg_slots == NULL)
1046 			continue;
1047 
1048 		for (entry = 0; entry < nentries; entry++) {
1049 			msg_slot = ch_uv->recv_msg_slots +
1050 			    entry * ch->entry_size;
1051 
1052 			msg_slot->hdr.msg_slot_number = entry;
1053 		}
1054 
1055 		spin_lock_irqsave(&ch->lock, irq_flags);
1056 		if (nentries < ch->remote_nentries)
1057 			ch->remote_nentries = nentries;
1058 		spin_unlock_irqrestore(&ch->lock, irq_flags);
1059 		return xpSuccess;
1060 	}
1061 
1062 	return xpNoMemory;
1063 }
1064 
1065 /*
1066  * Allocate msg_slots associated with the channel.
1067  */
1068 static enum xp_retval
1069 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
1070 {
1071 	static enum xp_retval ret;
1072 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1073 
1074 	DBUG_ON(ch->flags & XPC_C_SETUP);
1075 
1076 	ch_uv->cached_notify_gru_mq_desc = kmalloc_obj(struct gru_message_queue_desc);
1077 	if (ch_uv->cached_notify_gru_mq_desc == NULL)
1078 		return xpNoMemory;
1079 
1080 	ret = xpc_allocate_send_msg_slot_uv(ch);
1081 	if (ret == xpSuccess) {
1082 
1083 		ret = xpc_allocate_recv_msg_slot_uv(ch);
1084 		if (ret != xpSuccess) {
1085 			kfree(ch_uv->send_msg_slots);
1086 			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1087 		}
1088 	}
1089 	return ret;
1090 }
1091 
1092 /*
1093  * Free up msg_slots and clear other stuff that were setup for the specified
1094  * channel.
1095  */
1096 static void
1097 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
1098 {
1099 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1100 
1101 	lockdep_assert_held(&ch->lock);
1102 
1103 	kfree(ch_uv->cached_notify_gru_mq_desc);
1104 	ch_uv->cached_notify_gru_mq_desc = NULL;
1105 
1106 	if (ch->flags & XPC_C_SETUP) {
1107 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1108 		kfree(ch_uv->send_msg_slots);
1109 		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1110 		kfree(ch_uv->recv_msg_slots);
1111 	}
1112 }
1113 
1114 static void
1115 xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1116 {
1117 	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
1118 
1119 	msg.ch_number = ch->number;
1120 	msg.reason = ch->reason;
1121 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1122 				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
1123 }
1124 
1125 static void
1126 xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1127 {
1128 	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
1129 
1130 	msg.ch_number = ch->number;
1131 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1132 				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
1133 }
1134 
1135 static void
1136 xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1137 {
1138 	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
1139 
1140 	msg.ch_number = ch->number;
1141 	msg.entry_size = ch->entry_size;
1142 	msg.local_nentries = ch->local_nentries;
1143 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1144 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
1145 }
1146 
1147 static void
1148 xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1149 {
1150 	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
1151 
1152 	msg.ch_number = ch->number;
1153 	msg.local_nentries = ch->local_nentries;
1154 	msg.remote_nentries = ch->remote_nentries;
1155 	msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
1156 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1157 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
1158 }
1159 
1160 static void
1161 xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1162 {
1163 	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
1164 
1165 	msg.ch_number = ch->number;
1166 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1167 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
1168 }
1169 
1170 static void
1171 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1172 {
1173 	unsigned long irq_flags;
1174 
1175 	spin_lock_irqsave(&part->chctl_lock, irq_flags);
1176 	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
1177 	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1178 
1179 	xpc_wakeup_channel_mgr(part);
1180 }
1181 
1182 static enum xp_retval
1183 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
1184 			       unsigned long gru_mq_desc_gpa)
1185 {
1186 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1187 
1188 	DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
1189 	return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
1190 					       gru_mq_desc_gpa);
1191 }
1192 
1193 static void
1194 xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
1195 {
1196 	struct xpc_activate_mq_msg_uv msg;
1197 
1198 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1199 				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
1200 }
1201 
1202 static void
1203 xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
1204 {
1205 	struct xpc_activate_mq_msg_uv msg;
1206 
1207 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1208 				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
1209 }
1210 
1211 static void
1212 xpc_assume_partition_disengaged_uv(short partid)
1213 {
1214 	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
1215 	unsigned long irq_flags;
1216 
1217 	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
1218 	part_uv->flags &= ~XPC_P_ENGAGED_UV;
1219 	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
1220 }
1221 
1222 static int
1223 xpc_partition_engaged_uv(short partid)
1224 {
1225 	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
1226 }
1227 
1228 static int
1229 xpc_any_partition_engaged_uv(void)
1230 {
1231 	struct xpc_partition_uv *part_uv;
1232 	short partid;
1233 
1234 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
1235 		part_uv = &xpc_partitions[partid].sn.uv;
1236 		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
1237 			return 1;
1238 	}
1239 	return 0;
1240 }
1241 
1242 static enum xp_retval
1243 xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1244 			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
1245 {
1246 	enum xp_retval ret;
1247 	struct xpc_send_msg_slot_uv *msg_slot;
1248 	struct xpc_fifo_entry_uv *entry;
1249 
1250 	while (1) {
1251 		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1252 		if (entry != NULL)
1253 			break;
1254 
1255 		if (flags & XPC_NOWAIT)
1256 			return xpNoWait;
1257 
1258 		ret = xpc_allocate_msg_wait(ch);
1259 		if (ret != xpInterrupted && ret != xpTimeout)
1260 			return ret;
1261 	}
1262 
1263 	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1264 	*address_of_msg_slot = msg_slot;
1265 	return xpSuccess;
1266 }
1267 
1268 static void
1269 xpc_free_msg_slot_uv(struct xpc_channel *ch,
1270 		     struct xpc_send_msg_slot_uv *msg_slot)
1271 {
1272 	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1273 
1274 	/* wakeup anyone waiting for a free msg slot */
1275 	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1276 		wake_up(&ch->msg_allocate_wq);
1277 }
1278 
1279 static void
1280 xpc_notify_sender_uv(struct xpc_channel *ch,
1281 		     struct xpc_send_msg_slot_uv *msg_slot,
1282 		     enum xp_retval reason)
1283 {
1284 	xpc_notify_func func = msg_slot->func;
1285 
1286 	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1287 
1288 		atomic_dec(&ch->n_to_notify);
1289 
1290 		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1291 			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1292 			msg_slot->msg_slot_number, ch->partid, ch->number);
1293 
1294 		func(reason, ch->partid, ch->number, msg_slot->key);
1295 
1296 		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1297 			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1298 			msg_slot->msg_slot_number, ch->partid, ch->number);
1299 	}
1300 }
1301 
1302 static void
1303 xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1304 			    struct xpc_notify_mq_msg_uv *msg)
1305 {
1306 	struct xpc_send_msg_slot_uv *msg_slot;
1307 	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1308 
1309 	msg_slot = &ch->sn.uv.send_msg_slots[entry];
1310 
1311 	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1312 	msg_slot->msg_slot_number += ch->local_nentries;
1313 
1314 	if (msg_slot->func != NULL)
1315 		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1316 
1317 	xpc_free_msg_slot_uv(ch, msg_slot);
1318 }
1319 
1320 static void
1321 xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1322 			    struct xpc_notify_mq_msg_uv *msg)
1323 {
1324 	struct xpc_partition_uv *part_uv = &part->sn.uv;
1325 	struct xpc_channel *ch;
1326 	struct xpc_channel_uv *ch_uv;
1327 	struct xpc_notify_mq_msg_uv *msg_slot;
1328 	unsigned long irq_flags;
1329 	int ch_number = msg->hdr.ch_number;
1330 
1331 	if (unlikely(ch_number >= part->nchannels)) {
1332 		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1333 			"channel number=0x%x in message from partid=%d\n",
1334 			ch_number, XPC_PARTID(part));
1335 
1336 		/* get hb checker to deactivate from the remote partition */
1337 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1338 		if (part_uv->act_state_req == 0)
1339 			xpc_activate_IRQ_rcvd++;
1340 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1341 		part_uv->reason = xpBadChannelNumber;
1342 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1343 
1344 		wake_up_interruptible(&xpc_activate_IRQ_wq);
1345 		return;
1346 	}
1347 
1348 	ch = &part->channels[ch_number];
1349 	xpc_msgqueue_ref(ch);
1350 
1351 	if (!(ch->flags & XPC_C_CONNECTED)) {
1352 		xpc_msgqueue_deref(ch);
1353 		return;
1354 	}
1355 
1356 	/* see if we're really dealing with an ACK for a previously sent msg */
1357 	if (msg->hdr.size == 0) {
1358 		xpc_handle_notify_mq_ack_uv(ch, msg);
1359 		xpc_msgqueue_deref(ch);
1360 		return;
1361 	}
1362 
1363 	/* we're dealing with a normal message sent via the notify_mq */
1364 	ch_uv = &ch->sn.uv;
1365 
1366 	msg_slot = ch_uv->recv_msg_slots +
1367 	    (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
1368 
1369 	BUG_ON(msg_slot->hdr.size != 0);
1370 
1371 	memcpy(msg_slot, msg, msg->hdr.size);
1372 
1373 	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1374 
1375 	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1376 		/*
1377 		 * If there is an existing idle kthread get it to deliver
1378 		 * the payload, otherwise we'll have to get the channel mgr
1379 		 * for this partition to create a kthread to do the delivery.
1380 		 */
1381 		if (atomic_read(&ch->kthreads_idle) > 0)
1382 			wake_up_nr(&ch->idle_wq, 1);
1383 		else
1384 			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1385 	}
1386 	xpc_msgqueue_deref(ch);
1387 }
1388 
1389 static irqreturn_t
1390 xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1391 {
1392 	struct xpc_notify_mq_msg_uv *msg;
1393 	short partid;
1394 	struct xpc_partition *part;
1395 
1396 	while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
1397 	       NULL) {
1398 
1399 		partid = msg->hdr.partid;
1400 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1401 			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1402 				"invalid partid=0x%x in message\n", partid);
1403 		} else {
1404 			part = &xpc_partitions[partid];
1405 
1406 			if (xpc_part_ref(part)) {
1407 				xpc_handle_notify_mq_msg_uv(part, msg);
1408 				xpc_part_deref(part);
1409 			}
1410 		}
1411 
1412 		gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
1413 	}
1414 
1415 	return IRQ_HANDLED;
1416 }
1417 
1418 static int
1419 xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1420 {
1421 	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1422 }
1423 
1424 static void
1425 xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1426 {
1427 	struct xpc_channel *ch = &part->channels[ch_number];
1428 	int ndeliverable_payloads;
1429 
1430 	xpc_msgqueue_ref(ch);
1431 
1432 	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1433 
1434 	if (ndeliverable_payloads > 0 &&
1435 	    (ch->flags & XPC_C_CONNECTED) &&
1436 	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1437 
1438 		xpc_activate_kthreads(ch, ndeliverable_payloads);
1439 	}
1440 
1441 	xpc_msgqueue_deref(ch);
1442 }
1443 
1444 static enum xp_retval
1445 xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1446 		    u16 payload_size, u8 notify_type, xpc_notify_func func,
1447 		    void *key)
1448 {
1449 	enum xp_retval ret = xpSuccess;
1450 	struct xpc_send_msg_slot_uv *msg_slot = NULL;
1451 	struct xpc_notify_mq_msg_uv *msg;
1452 	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1453 	size_t msg_size;
1454 
1455 	DBUG_ON(notify_type != XPC_N_CALL);
1456 
1457 	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1458 	if (msg_size > ch->entry_size)
1459 		return xpPayloadTooBig;
1460 
1461 	xpc_msgqueue_ref(ch);
1462 
1463 	if (ch->flags & XPC_C_DISCONNECTING) {
1464 		ret = ch->reason;
1465 		goto out_1;
1466 	}
1467 	if (!(ch->flags & XPC_C_CONNECTED)) {
1468 		ret = xpNotConnected;
1469 		goto out_1;
1470 	}
1471 
1472 	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1473 	if (ret != xpSuccess)
1474 		goto out_1;
1475 
1476 	if (func != NULL) {
1477 		atomic_inc(&ch->n_to_notify);
1478 
1479 		msg_slot->key = key;
1480 		smp_wmb(); /* a non-NULL func must hit memory after the key */
1481 		msg_slot->func = func;
1482 
1483 		if (ch->flags & XPC_C_DISCONNECTING) {
1484 			ret = ch->reason;
1485 			goto out_2;
1486 		}
1487 	}
1488 
1489 	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1490 	msg->hdr.partid = xp_partition_id;
1491 	msg->hdr.ch_number = ch->number;
1492 	msg->hdr.size = msg_size;
1493 	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1494 	memcpy(&msg->payload, payload, payload_size);
1495 
1496 	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1497 			       msg_size);
1498 	if (ret == xpSuccess)
1499 		goto out_1;
1500 
1501 	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1502 out_2:
1503 	if (func != NULL) {
1504 		/*
1505 		 * Try to NULL the msg_slot's func field. If we fail, then
1506 		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1507 		 * case we need to pretend we succeeded to send the message
1508 		 * since the user will get a callout for the disconnect error
1509 		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1510 		 * error returned here will confuse them. Additionally, since
1511 		 * in this case the channel is being disconnected we don't need
1512 		 * to put the msg_slot back on the free list.
1513 		 */
1514 		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1515 			ret = xpSuccess;
1516 			goto out_1;
1517 		}
1518 
1519 		msg_slot->key = NULL;
1520 		atomic_dec(&ch->n_to_notify);
1521 	}
1522 	xpc_free_msg_slot_uv(ch, msg_slot);
1523 out_1:
1524 	xpc_msgqueue_deref(ch);
1525 	return ret;
1526 }
1527 
1528 /*
1529  * Tell the callers of xpc_send_notify() that the status of their payloads
1530  * is unknown because the channel is now disconnecting.
1531  *
1532  * We don't worry about putting these msg_slots on the free list since the
1533  * msg_slots themselves are about to be kfree'd.
1534  */
1535 static void
1536 xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1537 {
1538 	struct xpc_send_msg_slot_uv *msg_slot;
1539 	int entry;
1540 
1541 	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1542 
1543 	for (entry = 0; entry < ch->local_nentries; entry++) {
1544 
1545 		if (atomic_read(&ch->n_to_notify) == 0)
1546 			break;
1547 
1548 		msg_slot = &ch->sn.uv.send_msg_slots[entry];
1549 		if (msg_slot->func != NULL)
1550 			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1551 	}
1552 }
1553 
1554 /*
1555  * Get the next deliverable message's payload.
1556  */
1557 static void *
1558 xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1559 {
1560 	struct xpc_fifo_entry_uv *entry;
1561 	struct xpc_notify_mq_msg_uv *msg;
1562 	void *payload = NULL;
1563 
1564 	if (!(ch->flags & XPC_C_DISCONNECTING)) {
1565 		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1566 		if (entry != NULL) {
1567 			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1568 					   hdr.u.next);
1569 			payload = &msg->payload;
1570 		}
1571 	}
1572 	return payload;
1573 }
1574 
1575 static void
1576 xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1577 {
1578 	struct xpc_notify_mq_msg_uv *msg;
1579 	enum xp_retval ret;
1580 
1581 	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1582 
1583 	/* return an ACK to the sender of this message */
1584 
1585 	msg->hdr.partid = xp_partition_id;
1586 	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
1587 
1588 	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1589 			       sizeof(struct xpc_notify_mq_msghdr_uv));
1590 	if (ret != xpSuccess)
1591 		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1592 }
1593 
1594 static const struct xpc_arch_operations xpc_arch_ops_uv = {
1595 	.setup_partitions = xpc_setup_partitions_uv,
1596 	.teardown_partitions = xpc_teardown_partitions_uv,
1597 	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
1598 	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
1599 	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
1600 
1601 	.allow_hb = xpc_allow_hb_uv,
1602 	.disallow_hb = xpc_disallow_hb_uv,
1603 	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
1604 	.increment_heartbeat = xpc_increment_heartbeat_uv,
1605 	.offline_heartbeat = xpc_offline_heartbeat_uv,
1606 	.online_heartbeat = xpc_online_heartbeat_uv,
1607 	.heartbeat_init = xpc_heartbeat_init_uv,
1608 	.heartbeat_exit = xpc_heartbeat_exit_uv,
1609 	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
1610 
1611 	.request_partition_activation =
1612 		xpc_request_partition_activation_uv,
1613 	.request_partition_reactivation =
1614 		xpc_request_partition_reactivation_uv,
1615 	.request_partition_deactivation =
1616 		xpc_request_partition_deactivation_uv,
1617 	.cancel_partition_deactivation_request =
1618 		xpc_cancel_partition_deactivation_request_uv,
1619 
1620 	.setup_ch_structures = xpc_setup_ch_structures_uv,
1621 	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
1622 
1623 	.make_first_contact = xpc_make_first_contact_uv,
1624 
1625 	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
1626 	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
1627 	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
1628 	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
1629 	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
1630 	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
1631 	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
1632 
1633 	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
1634 
1635 	.setup_msg_structures = xpc_setup_msg_structures_uv,
1636 	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
1637 
1638 	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
1639 	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
1640 	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
1641 	.partition_engaged = xpc_partition_engaged_uv,
1642 	.any_partition_engaged = xpc_any_partition_engaged_uv,
1643 
1644 	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
1645 	.send_payload = xpc_send_payload_uv,
1646 	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
1647 	.received_payload = xpc_received_payload_uv,
1648 	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
1649 };
1650 
1651 static int
1652 xpc_init_mq_node(int nid)
1653 {
1654 	int cpu;
1655 
1656 	cpus_read_lock();
1657 
1658 	for_each_cpu(cpu, cpumask_of_node(nid)) {
1659 		xpc_activate_mq_uv =
1660 			xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
1661 					     XPC_ACTIVATE_IRQ_NAME,
1662 					     xpc_handle_activate_IRQ_uv);
1663 		if (!IS_ERR(xpc_activate_mq_uv))
1664 			break;
1665 	}
1666 	if (IS_ERR(xpc_activate_mq_uv)) {
1667 		cpus_read_unlock();
1668 		return PTR_ERR(xpc_activate_mq_uv);
1669 	}
1670 
1671 	for_each_cpu(cpu, cpumask_of_node(nid)) {
1672 		xpc_notify_mq_uv =
1673 			xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
1674 					     XPC_NOTIFY_IRQ_NAME,
1675 					     xpc_handle_notify_IRQ_uv);
1676 		if (!IS_ERR(xpc_notify_mq_uv))
1677 			break;
1678 	}
1679 	if (IS_ERR(xpc_notify_mq_uv)) {
1680 		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1681 		cpus_read_unlock();
1682 		return PTR_ERR(xpc_notify_mq_uv);
1683 	}
1684 
1685 	cpus_read_unlock();
1686 	return 0;
1687 }
1688 
1689 int
1690 xpc_init_uv(void)
1691 {
1692 	int nid;
1693 	int ret = 0;
1694 
1695 	xpc_arch_ops = xpc_arch_ops_uv;
1696 
1697 	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1698 		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1699 			XPC_MSG_HDR_MAX_SIZE);
1700 		return -E2BIG;
1701 	}
1702 
1703 	if (xpc_mq_node < 0)
1704 		for_each_online_node(nid) {
1705 			ret = xpc_init_mq_node(nid);
1706 
1707 			if (!ret)
1708 				break;
1709 		}
1710 	else
1711 		ret = xpc_init_mq_node(xpc_mq_node);
1712 
1713 	if (ret < 0)
1714 		dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
1715 			-ret);
1716 
1717 	return ret;
1718 }
1719 
1720 void
1721 xpc_exit_uv(void)
1722 {
1723 	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1724 	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1725 }
1726 
1727 module_param(xpc_mq_node, int, 0);
1728 MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
1729