xref: /linux/drivers/misc/sgi-xp/xpc_uv.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8 
9 /*
10  * Cross Partition Communication (XPC) uv-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/interrupt.h>
19 #include <linux/delay.h>
20 #include <linux/device.h>
21 #include <linux/cpu.h>
22 #include <linux/module.h>
23 #include <linux/err.h>
24 #include <linux/slab.h>
25 #include <linux/numa.h>
26 #include <asm/uv/uv_hub.h>
27 #include <asm/uv/bios.h>
28 #include <asm/uv/uv_irq.h>
29 #include "../sgi-gru/gru.h"
30 #include "../sgi-gru/grukservices.h"
31 #include "xpc.h"
32 
33 static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
34 
35 #define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
36 #define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
37 					 XPC_ACTIVATE_MSG_SIZE_UV)
38 #define XPC_ACTIVATE_IRQ_NAME		"xpc_activate"
39 
40 #define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
41 #define XPC_NOTIFY_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
42 					 XPC_NOTIFY_MSG_SIZE_UV)
43 #define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
44 
45 static int xpc_mq_node = NUMA_NO_NODE;
46 
47 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
48 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
49 
50 static int
51 xpc_setup_partitions_uv(void)
52 {
53 	short partid;
54 	struct xpc_partition_uv *part_uv;
55 
56 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
57 		part_uv = &xpc_partitions[partid].sn.uv;
58 
59 		mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
60 		spin_lock_init(&part_uv->flags_lock);
61 		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
62 	}
63 	return 0;
64 }
65 
66 static void
67 xpc_teardown_partitions_uv(void)
68 {
69 	short partid;
70 	struct xpc_partition_uv *part_uv;
71 	unsigned long irq_flags;
72 
73 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
74 		part_uv = &xpc_partitions[partid].sn.uv;
75 
76 		if (part_uv->cached_activate_gru_mq_desc != NULL) {
77 			mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
78 			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
79 			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
80 			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
81 			kfree(part_uv->cached_activate_gru_mq_desc);
82 			part_uv->cached_activate_gru_mq_desc = NULL;
83 			mutex_unlock(&part_uv->
84 				     cached_activate_gru_mq_desc_mutex);
85 		}
86 	}
87 }
88 
89 static int
90 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
91 {
92 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
93 
94 	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
95 			UV_AFFINITY_CPU);
96 	if (mq->irq < 0)
97 		return mq->irq;
98 
99 	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
100 
101 	return 0;
102 }
103 
104 static void
105 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
106 {
107 	uv_teardown_irq(mq->irq);
108 }
109 
110 static int
111 xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
112 {
113 	int ret;
114 
115 	ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
116 					 mq->order, &mq->mmr_offset);
117 	if (ret < 0) {
118 		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
119 			"ret=%d\n", ret);
120 		return ret;
121 	}
122 
123 	mq->watchlist_num = ret;
124 	return 0;
125 }
126 
127 static void
128 xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
129 {
130 	int ret;
131 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
132 
133 	ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
134 	BUG_ON(ret != BIOS_STATUS_SUCCESS);
135 }
136 
137 static struct xpc_gru_mq_uv *
138 xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
139 		     irq_handler_t irq_handler)
140 {
141 	enum xp_retval xp_ret;
142 	int ret;
143 	int nid;
144 	int nasid;
145 	int pg_order;
146 	struct page *page;
147 	struct xpc_gru_mq_uv *mq;
148 	struct uv_IO_APIC_route_entry *mmr_value;
149 
150 	mq = kmalloc_obj(struct xpc_gru_mq_uv);
151 	if (mq == NULL) {
152 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
153 			"a xpc_gru_mq_uv structure\n");
154 		ret = -ENOMEM;
155 		goto out_0;
156 	}
157 
158 	mq->gru_mq_desc = kzalloc_obj(struct gru_message_queue_desc);
159 	if (mq->gru_mq_desc == NULL) {
160 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
161 			"a gru_message_queue_desc structure\n");
162 		ret = -ENOMEM;
163 		goto out_1;
164 	}
165 
166 	pg_order = get_order(mq_size);
167 	mq->order = pg_order + PAGE_SHIFT;
168 	mq_size = 1UL << mq->order;
169 
170 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
171 
172 	nid = cpu_to_node(cpu);
173 	page = __alloc_pages_node(nid,
174 				      GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
175 				      pg_order);
176 	if (page == NULL) {
177 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
178 			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
179 		ret = -ENOMEM;
180 		goto out_2;
181 	}
182 	mq->address = page_address(page);
183 
184 	/* enable generation of irq when GRU mq operation occurs to this mq */
185 	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
186 	if (ret != 0)
187 		goto out_3;
188 
189 	ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
190 	if (ret != 0)
191 		goto out_4;
192 
193 	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
194 	if (ret != 0) {
195 		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
196 			mq->irq, -ret);
197 		goto out_5;
198 	}
199 
200 	nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
201 
202 	mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
203 	ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
204 				     nasid, mmr_value->vector, mmr_value->dest);
205 	if (ret != 0) {
206 		dev_err(xpc_part, "gru_create_message_queue() returned "
207 			"error=%d\n", ret);
208 		ret = -EINVAL;
209 		goto out_6;
210 	}
211 
212 	/* allow other partitions to access this GRU mq */
213 	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
214 	if (xp_ret != xpSuccess) {
215 		ret = -EACCES;
216 		goto out_6;
217 	}
218 
219 	return mq;
220 
221 	/* something went wrong */
222 out_6:
223 	free_irq(mq->irq, NULL);
224 out_5:
225 	xpc_release_gru_mq_irq_uv(mq);
226 out_4:
227 	xpc_gru_mq_watchlist_free_uv(mq);
228 out_3:
229 	free_pages((unsigned long)mq->address, pg_order);
230 out_2:
231 	kfree(mq->gru_mq_desc);
232 out_1:
233 	kfree(mq);
234 out_0:
235 	return ERR_PTR(ret);
236 }
237 
238 static void
239 xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
240 {
241 	unsigned int mq_size;
242 	int pg_order;
243 	int ret;
244 
245 	/* disallow other partitions to access GRU mq */
246 	mq_size = 1UL << mq->order;
247 	ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
248 	BUG_ON(ret != xpSuccess);
249 
250 	/* unregister irq handler and release mq irq/vector mapping */
251 	free_irq(mq->irq, NULL);
252 	xpc_release_gru_mq_irq_uv(mq);
253 
254 	/* disable generation of irq when GRU mq op occurs to this mq */
255 	xpc_gru_mq_watchlist_free_uv(mq);
256 
257 	pg_order = mq->order - PAGE_SHIFT;
258 	free_pages((unsigned long)mq->address, pg_order);
259 
260 	kfree(mq);
261 }
262 
263 static enum xp_retval
264 xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
265 		 size_t msg_size)
266 {
267 	enum xp_retval xp_ret;
268 	int ret;
269 
270 	while (1) {
271 		ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
272 		if (ret == MQE_OK) {
273 			xp_ret = xpSuccess;
274 			break;
275 		}
276 
277 		if (ret == MQE_QUEUE_FULL) {
278 			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
279 				"error=MQE_QUEUE_FULL\n");
280 			/* !!! handle QLimit reached; delay & try again */
281 			/* ??? Do we add a limit to the number of retries? */
282 			(void)msleep_interruptible(10);
283 		} else if (ret == MQE_CONGESTION) {
284 			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
285 				"error=MQE_CONGESTION\n");
286 			/* !!! handle LB Overflow; simply try again */
287 			/* ??? Do we add a limit to the number of retries? */
288 		} else {
289 			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
290 			dev_err(xpc_chan, "gru_send_message_gpa() returned "
291 				"error=%d\n", ret);
292 			xp_ret = xpGruSendMqError;
293 			break;
294 		}
295 	}
296 	return xp_ret;
297 }
298 
299 static void
300 xpc_process_activate_IRQ_rcvd_uv(void)
301 {
302 	unsigned long irq_flags;
303 	short partid;
304 	struct xpc_partition *part;
305 	u8 act_state_req;
306 
307 	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
308 
309 	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
310 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
311 		part = &xpc_partitions[partid];
312 
313 		if (part->sn.uv.act_state_req == 0)
314 			continue;
315 
316 		xpc_activate_IRQ_rcvd--;
317 		BUG_ON(xpc_activate_IRQ_rcvd < 0);
318 
319 		act_state_req = part->sn.uv.act_state_req;
320 		part->sn.uv.act_state_req = 0;
321 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
322 
323 		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
324 			if (part->act_state == XPC_P_AS_INACTIVE)
325 				xpc_activate_partition(part);
326 			else if (part->act_state == XPC_P_AS_DEACTIVATING)
327 				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
328 
329 		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
330 			if (part->act_state == XPC_P_AS_INACTIVE)
331 				xpc_activate_partition(part);
332 			else
333 				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
334 
335 		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
336 			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
337 
338 		} else {
339 			BUG();
340 		}
341 
342 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
343 		if (xpc_activate_IRQ_rcvd == 0)
344 			break;
345 	}
346 	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
347 
348 }
349 
350 static void
351 xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
352 			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
353 			      int part_setup,
354 			      int *wakeup_hb_checker)
355 {
356 	unsigned long irq_flags;
357 	struct xpc_partition_uv *part_uv = &part->sn.uv;
358 	struct xpc_openclose_args *args;
359 
360 	part_uv->remote_act_state = msg_hdr->act_state;
361 
362 	switch (msg_hdr->type) {
363 	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
364 		/* syncing of remote_act_state was just done above */
365 		break;
366 
367 	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
368 		struct xpc_activate_mq_msg_activate_req_uv *msg;
369 
370 		/*
371 		 * ??? Do we deal here with ts_jiffies being different
372 		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
373 		 * ??? below?
374 		 */
375 		msg = container_of(msg_hdr, struct
376 				   xpc_activate_mq_msg_activate_req_uv, hdr);
377 
378 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
379 		if (part_uv->act_state_req == 0)
380 			xpc_activate_IRQ_rcvd++;
381 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
382 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
383 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
384 		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
385 
386 		if (msg->activate_gru_mq_desc_gpa !=
387 		    part_uv->activate_gru_mq_desc_gpa) {
388 			spin_lock(&part_uv->flags_lock);
389 			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
390 			spin_unlock(&part_uv->flags_lock);
391 			part_uv->activate_gru_mq_desc_gpa =
392 			    msg->activate_gru_mq_desc_gpa;
393 		}
394 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
395 
396 		(*wakeup_hb_checker)++;
397 		break;
398 	}
399 	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
400 		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
401 
402 		msg = container_of(msg_hdr, struct
403 				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
404 
405 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
406 		if (part_uv->act_state_req == 0)
407 			xpc_activate_IRQ_rcvd++;
408 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
409 		part_uv->reason = msg->reason;
410 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
411 
412 		(*wakeup_hb_checker)++;
413 		return;
414 	}
415 	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
416 		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
417 
418 		if (!part_setup)
419 			break;
420 
421 		msg = container_of(msg_hdr, struct
422 				   xpc_activate_mq_msg_chctl_closerequest_uv,
423 				   hdr);
424 		args = &part->remote_openclose_args[msg->ch_number];
425 		args->reason = msg->reason;
426 
427 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
428 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
429 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
430 
431 		xpc_wakeup_channel_mgr(part);
432 		break;
433 	}
434 	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
435 		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
436 
437 		if (!part_setup)
438 			break;
439 
440 		msg = container_of(msg_hdr, struct
441 				   xpc_activate_mq_msg_chctl_closereply_uv,
442 				   hdr);
443 
444 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
445 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
446 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
447 
448 		xpc_wakeup_channel_mgr(part);
449 		break;
450 	}
451 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
452 		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
453 
454 		if (!part_setup)
455 			break;
456 
457 		msg = container_of(msg_hdr, struct
458 				   xpc_activate_mq_msg_chctl_openrequest_uv,
459 				   hdr);
460 		args = &part->remote_openclose_args[msg->ch_number];
461 		args->entry_size = msg->entry_size;
462 		args->local_nentries = msg->local_nentries;
463 
464 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
465 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
466 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
467 
468 		xpc_wakeup_channel_mgr(part);
469 		break;
470 	}
471 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
472 		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
473 
474 		if (!part_setup)
475 			break;
476 
477 		msg = container_of(msg_hdr, struct
478 				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
479 		args = &part->remote_openclose_args[msg->ch_number];
480 		args->remote_nentries = msg->remote_nentries;
481 		args->local_nentries = msg->local_nentries;
482 		args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
483 
484 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
485 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
486 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
487 
488 		xpc_wakeup_channel_mgr(part);
489 		break;
490 	}
491 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
492 		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
493 
494 		if (!part_setup)
495 			break;
496 
497 		msg = container_of(msg_hdr, struct
498 				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
499 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
500 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
501 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
502 
503 		xpc_wakeup_channel_mgr(part);
504 	}
505 		fallthrough;
506 	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
507 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
508 		part_uv->flags |= XPC_P_ENGAGED_UV;
509 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
510 		break;
511 
512 	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
513 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
514 		part_uv->flags &= ~XPC_P_ENGAGED_UV;
515 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
516 		break;
517 
518 	default:
519 		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
520 			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
521 
522 		/* get hb checker to deactivate from the remote partition */
523 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
524 		if (part_uv->act_state_req == 0)
525 			xpc_activate_IRQ_rcvd++;
526 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
527 		part_uv->reason = xpBadMsgType;
528 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
529 
530 		(*wakeup_hb_checker)++;
531 		return;
532 	}
533 
534 	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
535 	    part->remote_rp_ts_jiffies != 0) {
536 		/*
537 		 * ??? Does what we do here need to be sensitive to
538 		 * ??? act_state or remote_act_state?
539 		 */
540 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
541 		if (part_uv->act_state_req == 0)
542 			xpc_activate_IRQ_rcvd++;
543 		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
544 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
545 
546 		(*wakeup_hb_checker)++;
547 	}
548 }
549 
550 static irqreturn_t
551 xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
552 {
553 	struct xpc_activate_mq_msghdr_uv *msg_hdr;
554 	short partid;
555 	struct xpc_partition *part;
556 	int wakeup_hb_checker = 0;
557 	int part_referenced;
558 
559 	while (1) {
560 		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
561 		if (msg_hdr == NULL)
562 			break;
563 
564 		partid = msg_hdr->partid;
565 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
566 			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
567 				"received invalid partid=0x%x in message\n",
568 				partid);
569 		} else {
570 			part = &xpc_partitions[partid];
571 
572 			part_referenced = xpc_part_ref(part);
573 			xpc_handle_activate_mq_msg_uv(part, msg_hdr,
574 						      part_referenced,
575 						      &wakeup_hb_checker);
576 			if (part_referenced)
577 				xpc_part_deref(part);
578 		}
579 
580 		gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
581 	}
582 
583 	if (wakeup_hb_checker)
584 		wake_up_interruptible(&xpc_activate_IRQ_wq);
585 
586 	return IRQ_HANDLED;
587 }
588 
589 static enum xp_retval
590 xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
591 				unsigned long gru_mq_desc_gpa)
592 {
593 	enum xp_retval ret;
594 
595 	ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
596 			       sizeof(struct gru_message_queue_desc));
597 	if (ret == xpSuccess)
598 		gru_mq_desc->mq = NULL;
599 
600 	return ret;
601 }
602 
603 static enum xp_retval
604 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
605 			 int msg_type)
606 {
607 	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
608 	struct xpc_partition_uv *part_uv = &part->sn.uv;
609 	struct gru_message_queue_desc *gru_mq_desc;
610 	unsigned long irq_flags;
611 	enum xp_retval ret;
612 
613 	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
614 
615 	msg_hdr->type = msg_type;
616 	msg_hdr->partid = xp_partition_id;
617 	msg_hdr->act_state = part->act_state;
618 	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
619 
620 	mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
621 again:
622 	if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
623 		gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
624 		if (gru_mq_desc == NULL) {
625 			gru_mq_desc = kmalloc_obj(struct gru_message_queue_desc,
626 						  GFP_ATOMIC);
627 			if (gru_mq_desc == NULL) {
628 				ret = xpNoMemory;
629 				goto done;
630 			}
631 			part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
632 		}
633 
634 		ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
635 						      part_uv->
636 						      activate_gru_mq_desc_gpa);
637 		if (ret != xpSuccess)
638 			goto done;
639 
640 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
641 		part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
642 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
643 	}
644 
645 	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
646 	ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
647 			       msg_size);
648 	if (ret != xpSuccess) {
649 		smp_rmb();	/* ensure a fresh copy of part_uv->flags */
650 		if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
651 			goto again;
652 	}
653 done:
654 	mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
655 	return ret;
656 }
657 
658 static void
659 xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
660 			      size_t msg_size, int msg_type)
661 {
662 	enum xp_retval ret;
663 
664 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
665 	if (unlikely(ret != xpSuccess))
666 		XPC_DEACTIVATE_PARTITION(part, ret);
667 }
668 
669 static void
670 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
671 			 void *msg, size_t msg_size, int msg_type)
672 {
673 	struct xpc_partition *part = &xpc_partitions[ch->partid];
674 	enum xp_retval ret;
675 
676 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
677 	if (unlikely(ret != xpSuccess)) {
678 		if (irq_flags != NULL)
679 			spin_unlock_irqrestore(&ch->lock, *irq_flags);
680 
681 		XPC_DEACTIVATE_PARTITION(part, ret);
682 
683 		if (irq_flags != NULL)
684 			spin_lock_irqsave(&ch->lock, *irq_flags);
685 	}
686 }
687 
688 static void
689 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
690 {
691 	unsigned long irq_flags;
692 	struct xpc_partition_uv *part_uv = &part->sn.uv;
693 
694 	/*
695 	 * !!! Make our side think that the remote partition sent an activate
696 	 * !!! mq message our way by doing what the activate IRQ handler would
697 	 * !!! do had one really been sent.
698 	 */
699 
700 	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
701 	if (part_uv->act_state_req == 0)
702 		xpc_activate_IRQ_rcvd++;
703 	part_uv->act_state_req = act_state_req;
704 	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
705 
706 	wake_up_interruptible(&xpc_activate_IRQ_wq);
707 }
708 
709 static enum xp_retval
710 xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
711 				  size_t *len)
712 {
713 	s64 status;
714 	enum xp_retval ret;
715 
716 	status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
717 					  (u64 *)len);
718 	if (status == BIOS_STATUS_SUCCESS)
719 		ret = xpSuccess;
720 	else if (status == BIOS_STATUS_MORE_PASSES)
721 		ret = xpNeedMoreInfo;
722 	else
723 		ret = xpBiosError;
724 
725 	return ret;
726 }
727 
728 static int
729 xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
730 {
731 	xpc_heartbeat_uv =
732 	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
733 	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
734 	rp->sn.uv.activate_gru_mq_desc_gpa =
735 	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
736 	return 0;
737 }
738 
739 static void
740 xpc_allow_hb_uv(short partid)
741 {
742 }
743 
744 static void
745 xpc_disallow_hb_uv(short partid)
746 {
747 }
748 
749 static void
750 xpc_disallow_all_hbs_uv(void)
751 {
752 }
753 
754 static void
755 xpc_increment_heartbeat_uv(void)
756 {
757 	xpc_heartbeat_uv->value++;
758 }
759 
760 static void
761 xpc_offline_heartbeat_uv(void)
762 {
763 	xpc_increment_heartbeat_uv();
764 	xpc_heartbeat_uv->offline = 1;
765 }
766 
767 static void
768 xpc_online_heartbeat_uv(void)
769 {
770 	xpc_increment_heartbeat_uv();
771 	xpc_heartbeat_uv->offline = 0;
772 }
773 
774 static void
775 xpc_heartbeat_init_uv(void)
776 {
777 	xpc_heartbeat_uv->value = 1;
778 	xpc_heartbeat_uv->offline = 0;
779 }
780 
781 static void
782 xpc_heartbeat_exit_uv(void)
783 {
784 	xpc_offline_heartbeat_uv();
785 }
786 
787 static enum xp_retval
788 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
789 {
790 	struct xpc_partition_uv *part_uv = &part->sn.uv;
791 	enum xp_retval ret;
792 
793 	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
794 			       part_uv->heartbeat_gpa,
795 			       sizeof(struct xpc_heartbeat_uv));
796 	if (ret != xpSuccess)
797 		return ret;
798 
799 	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
800 	    !part_uv->cached_heartbeat.offline) {
801 
802 		ret = xpNoHeartbeat;
803 	} else {
804 		part->last_heartbeat = part_uv->cached_heartbeat.value;
805 	}
806 	return ret;
807 }
808 
809 static void
810 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
811 				    unsigned long remote_rp_gpa, int nasid)
812 {
813 	short partid = remote_rp->SAL_partid;
814 	struct xpc_partition *part = &xpc_partitions[partid];
815 	struct xpc_activate_mq_msg_activate_req_uv msg;
816 
817 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
818 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
819 	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
820 	part->sn.uv.activate_gru_mq_desc_gpa =
821 	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
822 
823 	/*
824 	 * ??? Is it a good idea to make this conditional on what is
825 	 * ??? potentially stale state information?
826 	 */
827 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
828 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
829 		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
830 		msg.activate_gru_mq_desc_gpa =
831 		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
832 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
833 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
834 	}
835 
836 	if (part->act_state == XPC_P_AS_INACTIVE)
837 		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
838 }
839 
840 static void
841 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
842 {
843 	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
844 }
845 
846 static void
847 xpc_request_partition_deactivation_uv(struct xpc_partition *part)
848 {
849 	struct xpc_activate_mq_msg_deactivate_req_uv msg;
850 
851 	/*
852 	 * ??? Is it a good idea to make this conditional on what is
853 	 * ??? potentially stale state information?
854 	 */
855 	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
856 	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
857 
858 		msg.reason = part->reason;
859 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
860 					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
861 	}
862 }
863 
864 static void
865 xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
866 {
867 	/* nothing needs to be done */
868 	return;
869 }
870 
871 static void
872 xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
873 {
874 	head->first = NULL;
875 	head->last = NULL;
876 	spin_lock_init(&head->lock);
877 	head->n_entries = 0;
878 }
879 
880 static void *
881 xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
882 {
883 	unsigned long irq_flags;
884 	struct xpc_fifo_entry_uv *first;
885 
886 	spin_lock_irqsave(&head->lock, irq_flags);
887 	first = head->first;
888 	if (head->first != NULL) {
889 		head->first = first->next;
890 		if (head->first == NULL)
891 			head->last = NULL;
892 
893 		head->n_entries--;
894 		BUG_ON(head->n_entries < 0);
895 
896 		first->next = NULL;
897 	}
898 	spin_unlock_irqrestore(&head->lock, irq_flags);
899 	return first;
900 }
901 
902 static void
903 xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
904 		      struct xpc_fifo_entry_uv *last)
905 {
906 	unsigned long irq_flags;
907 
908 	last->next = NULL;
909 	spin_lock_irqsave(&head->lock, irq_flags);
910 	if (head->last != NULL)
911 		head->last->next = last;
912 	else
913 		head->first = last;
914 	head->last = last;
915 	head->n_entries++;
916 	spin_unlock_irqrestore(&head->lock, irq_flags);
917 }
918 
919 static int
920 xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
921 {
922 	return head->n_entries;
923 }
924 
925 /*
926  * Setup the channel structures that are uv specific.
927  */
928 static enum xp_retval
929 xpc_setup_ch_structures_uv(struct xpc_partition *part)
930 {
931 	struct xpc_channel_uv *ch_uv;
932 	int ch_number;
933 
934 	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
935 		ch_uv = &part->channels[ch_number].sn.uv;
936 
937 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
938 		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
939 	}
940 
941 	return xpSuccess;
942 }
943 
944 /*
945  * Teardown the channel structures that are uv specific.
946  */
947 static void
948 xpc_teardown_ch_structures_uv(struct xpc_partition *part)
949 {
950 	/* nothing needs to be done */
951 	return;
952 }
953 
954 static enum xp_retval
955 xpc_make_first_contact_uv(struct xpc_partition *part)
956 {
957 	struct xpc_activate_mq_msg_uv msg;
958 
959 	/*
960 	 * We send a sync msg to get the remote partition's remote_act_state
961 	 * updated to our current act_state which at this point should
962 	 * be XPC_P_AS_ACTIVATING.
963 	 */
964 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
965 				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
966 
967 	while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
968 		 (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
969 
970 		dev_dbg(xpc_part, "waiting to make first contact with "
971 			"partition %d\n", XPC_PARTID(part));
972 
973 		/* wait a 1/4 of a second or so */
974 		(void)msleep_interruptible(250);
975 
976 		if (part->act_state == XPC_P_AS_DEACTIVATING)
977 			return part->reason;
978 	}
979 
980 	return xpSuccess;
981 }
982 
983 static u64
984 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
985 {
986 	unsigned long irq_flags;
987 	union xpc_channel_ctl_flags chctl;
988 
989 	spin_lock_irqsave(&part->chctl_lock, irq_flags);
990 	chctl = part->chctl;
991 	if (chctl.all_flags != 0)
992 		part->chctl.all_flags = 0;
993 
994 	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
995 	return chctl.all_flags;
996 }
997 
998 static enum xp_retval
999 xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
1000 {
1001 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1002 	struct xpc_send_msg_slot_uv *msg_slot;
1003 	unsigned long irq_flags;
1004 	int nentries;
1005 	int entry;
1006 	size_t nbytes;
1007 
1008 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1009 		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
1010 		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1011 		if (ch_uv->send_msg_slots == NULL)
1012 			continue;
1013 
1014 		for (entry = 0; entry < nentries; entry++) {
1015 			msg_slot = &ch_uv->send_msg_slots[entry];
1016 
1017 			msg_slot->msg_slot_number = entry;
1018 			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
1019 					      &msg_slot->next);
1020 		}
1021 
1022 		spin_lock_irqsave(&ch->lock, irq_flags);
1023 		if (nentries < ch->local_nentries)
1024 			ch->local_nentries = nentries;
1025 		spin_unlock_irqrestore(&ch->lock, irq_flags);
1026 		return xpSuccess;
1027 	}
1028 
1029 	return xpNoMemory;
1030 }
1031 
1032 static enum xp_retval
1033 xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
1034 {
1035 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1036 	struct xpc_notify_mq_msg_uv *msg_slot;
1037 	unsigned long irq_flags;
1038 	int nentries;
1039 	int entry;
1040 	size_t nbytes;
1041 
1042 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1043 		nbytes = nentries * ch->entry_size;
1044 		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1045 		if (ch_uv->recv_msg_slots == NULL)
1046 			continue;
1047 
1048 		for (entry = 0; entry < nentries; entry++) {
1049 			msg_slot = ch_uv->recv_msg_slots +
1050 			    entry * ch->entry_size;
1051 
1052 			msg_slot->hdr.msg_slot_number = entry;
1053 		}
1054 
1055 		spin_lock_irqsave(&ch->lock, irq_flags);
1056 		if (nentries < ch->remote_nentries)
1057 			ch->remote_nentries = nentries;
1058 		spin_unlock_irqrestore(&ch->lock, irq_flags);
1059 		return xpSuccess;
1060 	}
1061 
1062 	return xpNoMemory;
1063 }
1064 
1065 /*
1066  * Allocate msg_slots associated with the channel.
1067  */
1068 static enum xp_retval
1069 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
1070 {
1071 	static enum xp_retval ret;
1072 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1073 
1074 	DBUG_ON(ch->flags & XPC_C_SETUP);
1075 
1076 	ch_uv->cached_notify_gru_mq_desc = kmalloc_obj(struct gru_message_queue_desc,
1077 						       GFP_KERNEL);
1078 	if (ch_uv->cached_notify_gru_mq_desc == NULL)
1079 		return xpNoMemory;
1080 
1081 	ret = xpc_allocate_send_msg_slot_uv(ch);
1082 	if (ret == xpSuccess) {
1083 
1084 		ret = xpc_allocate_recv_msg_slot_uv(ch);
1085 		if (ret != xpSuccess) {
1086 			kfree(ch_uv->send_msg_slots);
1087 			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1088 		}
1089 	}
1090 	return ret;
1091 }
1092 
1093 /*
1094  * Free up msg_slots and clear other stuff that were setup for the specified
1095  * channel.
1096  */
1097 static void
1098 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
1099 {
1100 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1101 
1102 	lockdep_assert_held(&ch->lock);
1103 
1104 	kfree(ch_uv->cached_notify_gru_mq_desc);
1105 	ch_uv->cached_notify_gru_mq_desc = NULL;
1106 
1107 	if (ch->flags & XPC_C_SETUP) {
1108 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1109 		kfree(ch_uv->send_msg_slots);
1110 		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1111 		kfree(ch_uv->recv_msg_slots);
1112 	}
1113 }
1114 
1115 static void
1116 xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1117 {
1118 	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
1119 
1120 	msg.ch_number = ch->number;
1121 	msg.reason = ch->reason;
1122 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1123 				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
1124 }
1125 
1126 static void
1127 xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1128 {
1129 	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
1130 
1131 	msg.ch_number = ch->number;
1132 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1133 				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
1134 }
1135 
1136 static void
1137 xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1138 {
1139 	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
1140 
1141 	msg.ch_number = ch->number;
1142 	msg.entry_size = ch->entry_size;
1143 	msg.local_nentries = ch->local_nentries;
1144 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1145 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
1146 }
1147 
1148 static void
1149 xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1150 {
1151 	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
1152 
1153 	msg.ch_number = ch->number;
1154 	msg.local_nentries = ch->local_nentries;
1155 	msg.remote_nentries = ch->remote_nentries;
1156 	msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
1157 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1158 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
1159 }
1160 
1161 static void
1162 xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1163 {
1164 	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
1165 
1166 	msg.ch_number = ch->number;
1167 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1168 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
1169 }
1170 
1171 static void
1172 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1173 {
1174 	unsigned long irq_flags;
1175 
1176 	spin_lock_irqsave(&part->chctl_lock, irq_flags);
1177 	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
1178 	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1179 
1180 	xpc_wakeup_channel_mgr(part);
1181 }
1182 
1183 static enum xp_retval
1184 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
1185 			       unsigned long gru_mq_desc_gpa)
1186 {
1187 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1188 
1189 	DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
1190 	return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
1191 					       gru_mq_desc_gpa);
1192 }
1193 
1194 static void
1195 xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
1196 {
1197 	struct xpc_activate_mq_msg_uv msg;
1198 
1199 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1200 				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
1201 }
1202 
1203 static void
1204 xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
1205 {
1206 	struct xpc_activate_mq_msg_uv msg;
1207 
1208 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1209 				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
1210 }
1211 
1212 static void
1213 xpc_assume_partition_disengaged_uv(short partid)
1214 {
1215 	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
1216 	unsigned long irq_flags;
1217 
1218 	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
1219 	part_uv->flags &= ~XPC_P_ENGAGED_UV;
1220 	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
1221 }
1222 
1223 static int
1224 xpc_partition_engaged_uv(short partid)
1225 {
1226 	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
1227 }
1228 
1229 static int
1230 xpc_any_partition_engaged_uv(void)
1231 {
1232 	struct xpc_partition_uv *part_uv;
1233 	short partid;
1234 
1235 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
1236 		part_uv = &xpc_partitions[partid].sn.uv;
1237 		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
1238 			return 1;
1239 	}
1240 	return 0;
1241 }
1242 
1243 static enum xp_retval
1244 xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1245 			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
1246 {
1247 	enum xp_retval ret;
1248 	struct xpc_send_msg_slot_uv *msg_slot;
1249 	struct xpc_fifo_entry_uv *entry;
1250 
1251 	while (1) {
1252 		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1253 		if (entry != NULL)
1254 			break;
1255 
1256 		if (flags & XPC_NOWAIT)
1257 			return xpNoWait;
1258 
1259 		ret = xpc_allocate_msg_wait(ch);
1260 		if (ret != xpInterrupted && ret != xpTimeout)
1261 			return ret;
1262 	}
1263 
1264 	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1265 	*address_of_msg_slot = msg_slot;
1266 	return xpSuccess;
1267 }
1268 
1269 static void
1270 xpc_free_msg_slot_uv(struct xpc_channel *ch,
1271 		     struct xpc_send_msg_slot_uv *msg_slot)
1272 {
1273 	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1274 
1275 	/* wakeup anyone waiting for a free msg slot */
1276 	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1277 		wake_up(&ch->msg_allocate_wq);
1278 }
1279 
1280 static void
1281 xpc_notify_sender_uv(struct xpc_channel *ch,
1282 		     struct xpc_send_msg_slot_uv *msg_slot,
1283 		     enum xp_retval reason)
1284 {
1285 	xpc_notify_func func = msg_slot->func;
1286 
1287 	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1288 
1289 		atomic_dec(&ch->n_to_notify);
1290 
1291 		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1292 			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1293 			msg_slot->msg_slot_number, ch->partid, ch->number);
1294 
1295 		func(reason, ch->partid, ch->number, msg_slot->key);
1296 
1297 		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1298 			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1299 			msg_slot->msg_slot_number, ch->partid, ch->number);
1300 	}
1301 }
1302 
1303 static void
1304 xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1305 			    struct xpc_notify_mq_msg_uv *msg)
1306 {
1307 	struct xpc_send_msg_slot_uv *msg_slot;
1308 	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1309 
1310 	msg_slot = &ch->sn.uv.send_msg_slots[entry];
1311 
1312 	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1313 	msg_slot->msg_slot_number += ch->local_nentries;
1314 
1315 	if (msg_slot->func != NULL)
1316 		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1317 
1318 	xpc_free_msg_slot_uv(ch, msg_slot);
1319 }
1320 
1321 static void
1322 xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1323 			    struct xpc_notify_mq_msg_uv *msg)
1324 {
1325 	struct xpc_partition_uv *part_uv = &part->sn.uv;
1326 	struct xpc_channel *ch;
1327 	struct xpc_channel_uv *ch_uv;
1328 	struct xpc_notify_mq_msg_uv *msg_slot;
1329 	unsigned long irq_flags;
1330 	int ch_number = msg->hdr.ch_number;
1331 
1332 	if (unlikely(ch_number >= part->nchannels)) {
1333 		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1334 			"channel number=0x%x in message from partid=%d\n",
1335 			ch_number, XPC_PARTID(part));
1336 
1337 		/* get hb checker to deactivate from the remote partition */
1338 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1339 		if (part_uv->act_state_req == 0)
1340 			xpc_activate_IRQ_rcvd++;
1341 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1342 		part_uv->reason = xpBadChannelNumber;
1343 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1344 
1345 		wake_up_interruptible(&xpc_activate_IRQ_wq);
1346 		return;
1347 	}
1348 
1349 	ch = &part->channels[ch_number];
1350 	xpc_msgqueue_ref(ch);
1351 
1352 	if (!(ch->flags & XPC_C_CONNECTED)) {
1353 		xpc_msgqueue_deref(ch);
1354 		return;
1355 	}
1356 
1357 	/* see if we're really dealing with an ACK for a previously sent msg */
1358 	if (msg->hdr.size == 0) {
1359 		xpc_handle_notify_mq_ack_uv(ch, msg);
1360 		xpc_msgqueue_deref(ch);
1361 		return;
1362 	}
1363 
1364 	/* we're dealing with a normal message sent via the notify_mq */
1365 	ch_uv = &ch->sn.uv;
1366 
1367 	msg_slot = ch_uv->recv_msg_slots +
1368 	    (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
1369 
1370 	BUG_ON(msg_slot->hdr.size != 0);
1371 
1372 	memcpy(msg_slot, msg, msg->hdr.size);
1373 
1374 	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1375 
1376 	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1377 		/*
1378 		 * If there is an existing idle kthread get it to deliver
1379 		 * the payload, otherwise we'll have to get the channel mgr
1380 		 * for this partition to create a kthread to do the delivery.
1381 		 */
1382 		if (atomic_read(&ch->kthreads_idle) > 0)
1383 			wake_up_nr(&ch->idle_wq, 1);
1384 		else
1385 			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1386 	}
1387 	xpc_msgqueue_deref(ch);
1388 }
1389 
1390 static irqreturn_t
1391 xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1392 {
1393 	struct xpc_notify_mq_msg_uv *msg;
1394 	short partid;
1395 	struct xpc_partition *part;
1396 
1397 	while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
1398 	       NULL) {
1399 
1400 		partid = msg->hdr.partid;
1401 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1402 			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1403 				"invalid partid=0x%x in message\n", partid);
1404 		} else {
1405 			part = &xpc_partitions[partid];
1406 
1407 			if (xpc_part_ref(part)) {
1408 				xpc_handle_notify_mq_msg_uv(part, msg);
1409 				xpc_part_deref(part);
1410 			}
1411 		}
1412 
1413 		gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
1414 	}
1415 
1416 	return IRQ_HANDLED;
1417 }
1418 
1419 static int
1420 xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1421 {
1422 	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1423 }
1424 
1425 static void
1426 xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1427 {
1428 	struct xpc_channel *ch = &part->channels[ch_number];
1429 	int ndeliverable_payloads;
1430 
1431 	xpc_msgqueue_ref(ch);
1432 
1433 	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1434 
1435 	if (ndeliverable_payloads > 0 &&
1436 	    (ch->flags & XPC_C_CONNECTED) &&
1437 	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1438 
1439 		xpc_activate_kthreads(ch, ndeliverable_payloads);
1440 	}
1441 
1442 	xpc_msgqueue_deref(ch);
1443 }
1444 
1445 static enum xp_retval
1446 xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1447 		    u16 payload_size, u8 notify_type, xpc_notify_func func,
1448 		    void *key)
1449 {
1450 	enum xp_retval ret = xpSuccess;
1451 	struct xpc_send_msg_slot_uv *msg_slot = NULL;
1452 	struct xpc_notify_mq_msg_uv *msg;
1453 	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1454 	size_t msg_size;
1455 
1456 	DBUG_ON(notify_type != XPC_N_CALL);
1457 
1458 	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1459 	if (msg_size > ch->entry_size)
1460 		return xpPayloadTooBig;
1461 
1462 	xpc_msgqueue_ref(ch);
1463 
1464 	if (ch->flags & XPC_C_DISCONNECTING) {
1465 		ret = ch->reason;
1466 		goto out_1;
1467 	}
1468 	if (!(ch->flags & XPC_C_CONNECTED)) {
1469 		ret = xpNotConnected;
1470 		goto out_1;
1471 	}
1472 
1473 	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1474 	if (ret != xpSuccess)
1475 		goto out_1;
1476 
1477 	if (func != NULL) {
1478 		atomic_inc(&ch->n_to_notify);
1479 
1480 		msg_slot->key = key;
1481 		smp_wmb(); /* a non-NULL func must hit memory after the key */
1482 		msg_slot->func = func;
1483 
1484 		if (ch->flags & XPC_C_DISCONNECTING) {
1485 			ret = ch->reason;
1486 			goto out_2;
1487 		}
1488 	}
1489 
1490 	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1491 	msg->hdr.partid = xp_partition_id;
1492 	msg->hdr.ch_number = ch->number;
1493 	msg->hdr.size = msg_size;
1494 	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1495 	memcpy(&msg->payload, payload, payload_size);
1496 
1497 	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1498 			       msg_size);
1499 	if (ret == xpSuccess)
1500 		goto out_1;
1501 
1502 	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1503 out_2:
1504 	if (func != NULL) {
1505 		/*
1506 		 * Try to NULL the msg_slot's func field. If we fail, then
1507 		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1508 		 * case we need to pretend we succeeded to send the message
1509 		 * since the user will get a callout for the disconnect error
1510 		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1511 		 * error returned here will confuse them. Additionally, since
1512 		 * in this case the channel is being disconnected we don't need
1513 		 * to put the msg_slot back on the free list.
1514 		 */
1515 		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1516 			ret = xpSuccess;
1517 			goto out_1;
1518 		}
1519 
1520 		msg_slot->key = NULL;
1521 		atomic_dec(&ch->n_to_notify);
1522 	}
1523 	xpc_free_msg_slot_uv(ch, msg_slot);
1524 out_1:
1525 	xpc_msgqueue_deref(ch);
1526 	return ret;
1527 }
1528 
1529 /*
1530  * Tell the callers of xpc_send_notify() that the status of their payloads
1531  * is unknown because the channel is now disconnecting.
1532  *
1533  * We don't worry about putting these msg_slots on the free list since the
1534  * msg_slots themselves are about to be kfree'd.
1535  */
1536 static void
1537 xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1538 {
1539 	struct xpc_send_msg_slot_uv *msg_slot;
1540 	int entry;
1541 
1542 	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1543 
1544 	for (entry = 0; entry < ch->local_nentries; entry++) {
1545 
1546 		if (atomic_read(&ch->n_to_notify) == 0)
1547 			break;
1548 
1549 		msg_slot = &ch->sn.uv.send_msg_slots[entry];
1550 		if (msg_slot->func != NULL)
1551 			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1552 	}
1553 }
1554 
1555 /*
1556  * Get the next deliverable message's payload.
1557  */
1558 static void *
1559 xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1560 {
1561 	struct xpc_fifo_entry_uv *entry;
1562 	struct xpc_notify_mq_msg_uv *msg;
1563 	void *payload = NULL;
1564 
1565 	if (!(ch->flags & XPC_C_DISCONNECTING)) {
1566 		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1567 		if (entry != NULL) {
1568 			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1569 					   hdr.u.next);
1570 			payload = &msg->payload;
1571 		}
1572 	}
1573 	return payload;
1574 }
1575 
1576 static void
1577 xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1578 {
1579 	struct xpc_notify_mq_msg_uv *msg;
1580 	enum xp_retval ret;
1581 
1582 	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1583 
1584 	/* return an ACK to the sender of this message */
1585 
1586 	msg->hdr.partid = xp_partition_id;
1587 	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
1588 
1589 	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1590 			       sizeof(struct xpc_notify_mq_msghdr_uv));
1591 	if (ret != xpSuccess)
1592 		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1593 }
1594 
1595 static const struct xpc_arch_operations xpc_arch_ops_uv = {
1596 	.setup_partitions = xpc_setup_partitions_uv,
1597 	.teardown_partitions = xpc_teardown_partitions_uv,
1598 	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
1599 	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
1600 	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
1601 
1602 	.allow_hb = xpc_allow_hb_uv,
1603 	.disallow_hb = xpc_disallow_hb_uv,
1604 	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
1605 	.increment_heartbeat = xpc_increment_heartbeat_uv,
1606 	.offline_heartbeat = xpc_offline_heartbeat_uv,
1607 	.online_heartbeat = xpc_online_heartbeat_uv,
1608 	.heartbeat_init = xpc_heartbeat_init_uv,
1609 	.heartbeat_exit = xpc_heartbeat_exit_uv,
1610 	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
1611 
1612 	.request_partition_activation =
1613 		xpc_request_partition_activation_uv,
1614 	.request_partition_reactivation =
1615 		xpc_request_partition_reactivation_uv,
1616 	.request_partition_deactivation =
1617 		xpc_request_partition_deactivation_uv,
1618 	.cancel_partition_deactivation_request =
1619 		xpc_cancel_partition_deactivation_request_uv,
1620 
1621 	.setup_ch_structures = xpc_setup_ch_structures_uv,
1622 	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
1623 
1624 	.make_first_contact = xpc_make_first_contact_uv,
1625 
1626 	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
1627 	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
1628 	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
1629 	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
1630 	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
1631 	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
1632 	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
1633 
1634 	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
1635 
1636 	.setup_msg_structures = xpc_setup_msg_structures_uv,
1637 	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
1638 
1639 	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
1640 	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
1641 	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
1642 	.partition_engaged = xpc_partition_engaged_uv,
1643 	.any_partition_engaged = xpc_any_partition_engaged_uv,
1644 
1645 	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
1646 	.send_payload = xpc_send_payload_uv,
1647 	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
1648 	.received_payload = xpc_received_payload_uv,
1649 	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
1650 };
1651 
1652 static int
1653 xpc_init_mq_node(int nid)
1654 {
1655 	int cpu;
1656 
1657 	cpus_read_lock();
1658 
1659 	for_each_cpu(cpu, cpumask_of_node(nid)) {
1660 		xpc_activate_mq_uv =
1661 			xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
1662 					     XPC_ACTIVATE_IRQ_NAME,
1663 					     xpc_handle_activate_IRQ_uv);
1664 		if (!IS_ERR(xpc_activate_mq_uv))
1665 			break;
1666 	}
1667 	if (IS_ERR(xpc_activate_mq_uv)) {
1668 		cpus_read_unlock();
1669 		return PTR_ERR(xpc_activate_mq_uv);
1670 	}
1671 
1672 	for_each_cpu(cpu, cpumask_of_node(nid)) {
1673 		xpc_notify_mq_uv =
1674 			xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
1675 					     XPC_NOTIFY_IRQ_NAME,
1676 					     xpc_handle_notify_IRQ_uv);
1677 		if (!IS_ERR(xpc_notify_mq_uv))
1678 			break;
1679 	}
1680 	if (IS_ERR(xpc_notify_mq_uv)) {
1681 		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1682 		cpus_read_unlock();
1683 		return PTR_ERR(xpc_notify_mq_uv);
1684 	}
1685 
1686 	cpus_read_unlock();
1687 	return 0;
1688 }
1689 
1690 int
1691 xpc_init_uv(void)
1692 {
1693 	int nid;
1694 	int ret = 0;
1695 
1696 	xpc_arch_ops = xpc_arch_ops_uv;
1697 
1698 	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1699 		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1700 			XPC_MSG_HDR_MAX_SIZE);
1701 		return -E2BIG;
1702 	}
1703 
1704 	if (xpc_mq_node < 0)
1705 		for_each_online_node(nid) {
1706 			ret = xpc_init_mq_node(nid);
1707 
1708 			if (!ret)
1709 				break;
1710 		}
1711 	else
1712 		ret = xpc_init_mq_node(xpc_mq_node);
1713 
1714 	if (ret < 0)
1715 		dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
1716 			-ret);
1717 
1718 	return ret;
1719 }
1720 
1721 void
1722 xpc_exit_uv(void)
1723 {
1724 	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1725 	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1726 }
1727 
1728 module_param(xpc_mq_node, int, 0);
1729 MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
1730