1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. 7 */ 8 9 /* 10 * Cross Partition Communication (XPC) partition support. 11 * 12 * This is the part of XPC that detects the presence/absence of 13 * other partitions. It provides a heartbeat and monitors the 14 * heartbeats of other partitions. 15 * 16 */ 17 18 #include <linux/device.h> 19 #include <linux/hardirq.h> 20 #include "xpc.h" 21 22 /* XPC is exiting flag */ 23 int xpc_exiting; 24 25 /* this partition's reserved page pointers */ 26 struct xpc_rsvd_page *xpc_rsvd_page; 27 static unsigned long *xpc_part_nasids; 28 unsigned long *xpc_mach_nasids; 29 30 static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ 31 int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ 32 33 struct xpc_partition *xpc_partitions; 34 35 /* 36 * Guarantee that the kmalloc'd memory is cacheline aligned. 37 */ 38 void * 39 xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) 40 { 41 /* see if kmalloc will give us cachline aligned memory by default */ 42 *base = kmalloc(size, flags); 43 if (*base == NULL) 44 return NULL; 45 46 if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) 47 return *base; 48 49 kfree(*base); 50 51 /* nope, we'll have to do it ourselves */ 52 *base = kmalloc(size + L1_CACHE_BYTES, flags); 53 if (*base == NULL) 54 return NULL; 55 56 return (void *)L1_CACHE_ALIGN((u64)*base); 57 } 58 59 /* 60 * Given a nasid, get the physical address of the partition's reserved page 61 * for that nasid. This function returns 0 on any error. 62 */ 63 static unsigned long 64 xpc_get_rsvd_page_pa(int nasid) 65 { 66 enum xp_retval ret; 67 u64 cookie = 0; 68 unsigned long rp_pa = nasid; /* seed with nasid */ 69 size_t len = 0; 70 size_t buf_len = 0; 71 void *buf = buf; 72 void *buf_base = NULL; 73 enum xp_retval (*get_partition_rsvd_page_pa) 74 (void *, u64 *, unsigned long *, size_t *) = 75 xpc_arch_ops.get_partition_rsvd_page_pa; 76 77 while (1) { 78 79 /* !!! rp_pa will need to be _gpa on UV. 80 * ??? So do we save it into the architecture specific parts 81 * ??? of the xpc_partition structure? Do we rename this 82 * ??? function or have two versions? Rename rp_pa for UV to 83 * ??? rp_gpa? 84 */ 85 ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len); 86 87 dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " 88 "address=0x%016lx, len=0x%016lx\n", ret, 89 (unsigned long)cookie, rp_pa, len); 90 91 if (ret != xpNeedMoreInfo) 92 break; 93 94 /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ 95 if (L1_CACHE_ALIGN(len) > buf_len) { 96 kfree(buf_base); 97 buf_len = L1_CACHE_ALIGN(len); 98 buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, 99 &buf_base); 100 if (buf_base == NULL) { 101 dev_err(xpc_part, "unable to kmalloc " 102 "len=0x%016lx\n", buf_len); 103 ret = xpNoMemory; 104 break; 105 } 106 } 107 108 ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len); 109 if (ret != xpSuccess) { 110 dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); 111 break; 112 } 113 } 114 115 kfree(buf_base); 116 117 if (ret != xpSuccess) 118 rp_pa = 0; 119 120 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); 121 return rp_pa; 122 } 123 124 /* 125 * Fill the partition reserved page with the information needed by 126 * other partitions to discover we are alive and establish initial 127 * communications. 128 */ 129 int 130 xpc_setup_rsvd_page(void) 131 { 132 int ret; 133 struct xpc_rsvd_page *rp; 134 unsigned long rp_pa; 135 unsigned long new_ts_jiffies; 136 137 /* get the local reserved page's address */ 138 139 preempt_disable(); 140 rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); 141 preempt_enable(); 142 if (rp_pa == 0) { 143 dev_err(xpc_part, "SAL failed to locate the reserved page\n"); 144 return -ESRCH; 145 } 146 rp = (struct xpc_rsvd_page *)__va(rp_pa); 147 148 if (rp->SAL_version < 3) { 149 /* SAL_versions < 3 had a SAL_partid defined as a u8 */ 150 rp->SAL_partid &= 0xff; 151 } 152 BUG_ON(rp->SAL_partid != xp_partition_id); 153 154 if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { 155 dev_err(xpc_part, "the reserved page's partid of %d is outside " 156 "supported range (< 0 || >= %d)\n", rp->SAL_partid, 157 xp_max_npartitions); 158 return -EINVAL; 159 } 160 161 rp->version = XPC_RP_VERSION; 162 rp->max_npartitions = xp_max_npartitions; 163 164 /* establish the actual sizes of the nasid masks */ 165 if (rp->SAL_version == 1) { 166 /* SAL_version 1 didn't set the nasids_size field */ 167 rp->SAL_nasids_size = 128; 168 } 169 xpc_nasid_mask_nbytes = rp->SAL_nasids_size; 170 xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * 171 BITS_PER_BYTE); 172 173 /* setup the pointers to the various items in the reserved page */ 174 xpc_part_nasids = XPC_RP_PART_NASIDS(rp); 175 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); 176 177 ret = xpc_arch_ops.setup_rsvd_page(rp); 178 if (ret != 0) 179 return ret; 180 181 /* 182 * Set timestamp of when reserved page was setup by XPC. 183 * This signifies to the remote partition that our reserved 184 * page is initialized. 185 */ 186 new_ts_jiffies = jiffies; 187 if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) 188 new_ts_jiffies++; 189 rp->ts_jiffies = new_ts_jiffies; 190 191 xpc_rsvd_page = rp; 192 return 0; 193 } 194 195 void 196 xpc_teardown_rsvd_page(void) 197 { 198 /* a zero timestamp indicates our rsvd page is not initialized */ 199 xpc_rsvd_page->ts_jiffies = 0; 200 } 201 202 /* 203 * Get a copy of a portion of the remote partition's rsvd page. 204 * 205 * remote_rp points to a buffer that is cacheline aligned for BTE copies and 206 * is large enough to contain a copy of their reserved page header and 207 * part_nasids mask. 208 */ 209 enum xp_retval 210 xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, 211 struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) 212 { 213 int l; 214 enum xp_retval ret; 215 216 /* get the reserved page's physical address */ 217 218 *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); 219 if (*remote_rp_pa == 0) 220 return xpNoRsvdPageAddr; 221 222 /* pull over the reserved page header and part_nasids mask */ 223 ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, 224 XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); 225 if (ret != xpSuccess) 226 return ret; 227 228 if (discovered_nasids != NULL) { 229 unsigned long *remote_part_nasids = 230 XPC_RP_PART_NASIDS(remote_rp); 231 232 for (l = 0; l < xpc_nasid_mask_nlongs; l++) 233 discovered_nasids[l] |= remote_part_nasids[l]; 234 } 235 236 /* zero timestamp indicates the reserved page has not been setup */ 237 if (remote_rp->ts_jiffies == 0) 238 return xpRsvdPageNotSet; 239 240 if (XPC_VERSION_MAJOR(remote_rp->version) != 241 XPC_VERSION_MAJOR(XPC_RP_VERSION)) { 242 return xpBadVersion; 243 } 244 245 /* check that both remote and local partids are valid for each side */ 246 if (remote_rp->SAL_partid < 0 || 247 remote_rp->SAL_partid >= xp_max_npartitions || 248 remote_rp->max_npartitions <= xp_partition_id) { 249 return xpInvalidPartid; 250 } 251 252 if (remote_rp->SAL_partid == xp_partition_id) 253 return xpLocalPartid; 254 255 return xpSuccess; 256 } 257 258 /* 259 * See if the other side has responded to a partition deactivate request 260 * from us. Though we requested the remote partition to deactivate with regard 261 * to us, we really only need to wait for the other side to disengage from us. 262 */ 263 int 264 xpc_partition_disengaged(struct xpc_partition *part) 265 { 266 short partid = XPC_PARTID(part); 267 int disengaged; 268 269 disengaged = !xpc_arch_ops.partition_engaged(partid); 270 if (part->disengage_timeout) { 271 if (!disengaged) { 272 if (time_is_after_jiffies(part->disengage_timeout)) { 273 /* timelimit hasn't been reached yet */ 274 return 0; 275 } 276 277 /* 278 * Other side hasn't responded to our deactivate 279 * request in a timely fashion, so assume it's dead. 280 */ 281 282 dev_info(xpc_part, "deactivate request to remote " 283 "partition %d timed out\n", partid); 284 xpc_disengage_timedout = 1; 285 xpc_arch_ops.assume_partition_disengaged(partid); 286 disengaged = 1; 287 } 288 part->disengage_timeout = 0; 289 290 /* cancel the timer function, provided it's not us */ 291 if (!in_interrupt()) 292 del_singleshot_timer_sync(&part->disengage_timer); 293 294 DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && 295 part->act_state != XPC_P_AS_INACTIVE); 296 if (part->act_state != XPC_P_AS_INACTIVE) 297 xpc_wakeup_channel_mgr(part); 298 299 xpc_arch_ops.cancel_partition_deactivation_request(part); 300 } 301 return disengaged; 302 } 303 304 /* 305 * Mark specified partition as active. 306 */ 307 enum xp_retval 308 xpc_mark_partition_active(struct xpc_partition *part) 309 { 310 unsigned long irq_flags; 311 enum xp_retval ret; 312 313 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); 314 315 spin_lock_irqsave(&part->act_lock, irq_flags); 316 if (part->act_state == XPC_P_AS_ACTIVATING) { 317 part->act_state = XPC_P_AS_ACTIVE; 318 ret = xpSuccess; 319 } else { 320 DBUG_ON(part->reason == xpSuccess); 321 ret = part->reason; 322 } 323 spin_unlock_irqrestore(&part->act_lock, irq_flags); 324 325 return ret; 326 } 327 328 /* 329 * Start the process of deactivating the specified partition. 330 */ 331 void 332 xpc_deactivate_partition(const int line, struct xpc_partition *part, 333 enum xp_retval reason) 334 { 335 unsigned long irq_flags; 336 337 spin_lock_irqsave(&part->act_lock, irq_flags); 338 339 if (part->act_state == XPC_P_AS_INACTIVE) { 340 XPC_SET_REASON(part, reason, line); 341 spin_unlock_irqrestore(&part->act_lock, irq_flags); 342 if (reason == xpReactivating) { 343 /* we interrupt ourselves to reactivate partition */ 344 xpc_arch_ops.request_partition_reactivation(part); 345 } 346 return; 347 } 348 if (part->act_state == XPC_P_AS_DEACTIVATING) { 349 if ((part->reason == xpUnloading && reason != xpUnloading) || 350 reason == xpReactivating) { 351 XPC_SET_REASON(part, reason, line); 352 } 353 spin_unlock_irqrestore(&part->act_lock, irq_flags); 354 return; 355 } 356 357 part->act_state = XPC_P_AS_DEACTIVATING; 358 XPC_SET_REASON(part, reason, line); 359 360 spin_unlock_irqrestore(&part->act_lock, irq_flags); 361 362 /* ask remote partition to deactivate with regard to us */ 363 xpc_arch_ops.request_partition_deactivation(part); 364 365 /* set a timelimit on the disengage phase of the deactivation request */ 366 part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); 367 part->disengage_timer.expires = part->disengage_timeout; 368 add_timer(&part->disengage_timer); 369 370 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", 371 XPC_PARTID(part), reason); 372 373 xpc_partition_going_down(part, reason); 374 } 375 376 /* 377 * Mark specified partition as inactive. 378 */ 379 void 380 xpc_mark_partition_inactive(struct xpc_partition *part) 381 { 382 unsigned long irq_flags; 383 384 dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", 385 XPC_PARTID(part)); 386 387 spin_lock_irqsave(&part->act_lock, irq_flags); 388 part->act_state = XPC_P_AS_INACTIVE; 389 spin_unlock_irqrestore(&part->act_lock, irq_flags); 390 part->remote_rp_pa = 0; 391 } 392 393 /* 394 * SAL has provided a partition and machine mask. The partition mask 395 * contains a bit for each even nasid in our partition. The machine 396 * mask contains a bit for each even nasid in the entire machine. 397 * 398 * Using those two bit arrays, we can determine which nasids are 399 * known in the machine. Each should also have a reserved page 400 * initialized if they are available for partitioning. 401 */ 402 void 403 xpc_discovery(void) 404 { 405 void *remote_rp_base; 406 struct xpc_rsvd_page *remote_rp; 407 unsigned long remote_rp_pa; 408 int region; 409 int region_size; 410 int max_regions; 411 int nasid; 412 struct xpc_rsvd_page *rp; 413 unsigned long *discovered_nasids; 414 enum xp_retval ret; 415 416 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + 417 xpc_nasid_mask_nbytes, 418 GFP_KERNEL, &remote_rp_base); 419 if (remote_rp == NULL) 420 return; 421 422 discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs, 423 GFP_KERNEL); 424 if (discovered_nasids == NULL) { 425 kfree(remote_rp_base); 426 return; 427 } 428 429 rp = (struct xpc_rsvd_page *)xpc_rsvd_page; 430 431 /* 432 * The term 'region' in this context refers to the minimum number of 433 * nodes that can comprise an access protection grouping. The access 434 * protection is in regards to memory, IOI and IPI. 435 */ 436 max_regions = 64; 437 region_size = xp_region_size; 438 439 switch (region_size) { 440 case 128: 441 max_regions *= 2; 442 case 64: 443 max_regions *= 2; 444 case 32: 445 max_regions *= 2; 446 region_size = 16; 447 DBUG_ON(!is_shub2()); 448 } 449 450 for (region = 0; region < max_regions; region++) { 451 452 if (xpc_exiting) 453 break; 454 455 dev_dbg(xpc_part, "searching region %d\n", region); 456 457 for (nasid = (region * region_size * 2); 458 nasid < ((region + 1) * region_size * 2); nasid += 2) { 459 460 if (xpc_exiting) 461 break; 462 463 dev_dbg(xpc_part, "checking nasid %d\n", nasid); 464 465 if (test_bit(nasid / 2, xpc_part_nasids)) { 466 dev_dbg(xpc_part, "PROM indicates Nasid %d is " 467 "part of the local partition; skipping " 468 "region\n", nasid); 469 break; 470 } 471 472 if (!(test_bit(nasid / 2, xpc_mach_nasids))) { 473 dev_dbg(xpc_part, "PROM indicates Nasid %d was " 474 "not on Numa-Link network at reset\n", 475 nasid); 476 continue; 477 } 478 479 if (test_bit(nasid / 2, discovered_nasids)) { 480 dev_dbg(xpc_part, "Nasid %d is part of a " 481 "partition which was previously " 482 "discovered\n", nasid); 483 continue; 484 } 485 486 /* pull over the rsvd page header & part_nasids mask */ 487 488 ret = xpc_get_remote_rp(nasid, discovered_nasids, 489 remote_rp, &remote_rp_pa); 490 if (ret != xpSuccess) { 491 dev_dbg(xpc_part, "unable to get reserved page " 492 "from nasid %d, reason=%d\n", nasid, 493 ret); 494 495 if (ret == xpLocalPartid) 496 break; 497 498 continue; 499 } 500 501 xpc_arch_ops.request_partition_activation(remote_rp, 502 remote_rp_pa, nasid); 503 } 504 } 505 506 kfree(discovered_nasids); 507 kfree(remote_rp_base); 508 } 509 510 /* 511 * Given a partid, get the nasids owned by that partition from the 512 * remote partition's reserved page. 513 */ 514 enum xp_retval 515 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) 516 { 517 struct xpc_partition *part; 518 unsigned long part_nasid_pa; 519 520 part = &xpc_partitions[partid]; 521 if (part->remote_rp_pa == 0) 522 return xpPartitionDown; 523 524 memset(nasid_mask, 0, xpc_nasid_mask_nbytes); 525 526 part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); 527 528 return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, 529 xpc_nasid_mask_nbytes); 530 } 531