1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * store hypervisor information instruction emulation functions. 4 * 5 * Copyright IBM Corp. 2016 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 7 */ 8 #include <linux/errno.h> 9 #include <linux/pagemap.h> 10 #include <linux/vmalloc.h> 11 #include <linux/syscalls.h> 12 #include <linux/mutex.h> 13 #include <asm/asm-offsets.h> 14 #include <asm/sclp.h> 15 #include <asm/diag.h> 16 #include <asm/sysinfo.h> 17 #include <asm/ebcdic.h> 18 #include <asm/facility.h> 19 #include <asm/sthyi.h> 20 #include "entry.h" 21 22 #define DED_WEIGHT 0xffff 23 /* 24 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 25 * as they are justified with spaces. 26 */ 27 #define CP 0xc3d7404040404040UL 28 #define IFL 0xc9c6d34040404040UL 29 30 enum hdr_flags { 31 HDR_NOT_LPAR = 0x10, 32 HDR_STACK_INCM = 0x20, 33 HDR_STSI_UNAV = 0x40, 34 HDR_PERF_UNAV = 0x80, 35 }; 36 37 enum mac_validity { 38 MAC_NAME_VLD = 0x20, 39 MAC_ID_VLD = 0x40, 40 MAC_CNT_VLD = 0x80, 41 }; 42 43 enum par_flag { 44 PAR_MT_EN = 0x80, 45 }; 46 47 enum par_validity { 48 PAR_GRP_VLD = 0x08, 49 PAR_ID_VLD = 0x10, 50 PAR_ABS_VLD = 0x20, 51 PAR_WGHT_VLD = 0x40, 52 PAR_PCNT_VLD = 0x80, 53 }; 54 55 struct hdr_sctn { 56 u8 infhflg1; 57 u8 infhflg2; /* reserved */ 58 u8 infhval1; /* reserved */ 59 u8 infhval2; /* reserved */ 60 u8 reserved[3]; 61 u8 infhygct; 62 u16 infhtotl; 63 u16 infhdln; 64 u16 infmoff; 65 u16 infmlen; 66 u16 infpoff; 67 u16 infplen; 68 u16 infhoff1; 69 u16 infhlen1; 70 u16 infgoff1; 71 u16 infglen1; 72 u16 infhoff2; 73 u16 infhlen2; 74 u16 infgoff2; 75 u16 infglen2; 76 u16 infhoff3; 77 u16 infhlen3; 78 u16 infgoff3; 79 u16 infglen3; 80 u8 reserved2[4]; 81 } __packed; 82 83 struct mac_sctn { 84 u8 infmflg1; /* reserved */ 85 u8 infmflg2; /* reserved */ 86 u8 infmval1; 87 u8 infmval2; /* reserved */ 88 u16 infmscps; 89 u16 infmdcps; 90 u16 infmsifl; 91 u16 infmdifl; 92 char infmname[8]; 93 char infmtype[4]; 94 char infmmanu[16]; 95 char infmseq[16]; 96 char infmpman[4]; 97 u8 reserved[4]; 98 } __packed; 99 100 struct par_sctn { 101 u8 infpflg1; 102 u8 infpflg2; /* reserved */ 103 u8 infpval1; 104 u8 infpval2; /* reserved */ 105 u16 infppnum; 106 u16 infpscps; 107 u16 infpdcps; 108 u16 infpsifl; 109 u16 infpdifl; 110 u16 reserved; 111 char infppnam[8]; 112 u32 infpwbcp; 113 u32 infpabcp; 114 u32 infpwbif; 115 u32 infpabif; 116 char infplgnm[8]; 117 u32 infplgcp; 118 u32 infplgif; 119 } __packed; 120 121 struct sthyi_sctns { 122 struct hdr_sctn hdr; 123 struct mac_sctn mac; 124 struct par_sctn par; 125 } __packed; 126 127 struct cpu_inf { 128 u64 lpar_cap; 129 u64 lpar_grp_cap; 130 u64 lpar_weight; 131 u64 all_weight; 132 int cpu_num_ded; 133 int cpu_num_shd; 134 }; 135 136 struct lpar_cpu_inf { 137 struct cpu_inf cp; 138 struct cpu_inf ifl; 139 }; 140 141 /* 142 * STHYI requires extensive locking in the higher hypervisors 143 * and is very computational/memory expensive. Therefore we 144 * cache the retrieved data whose valid period is 1s. 145 */ 146 #define CACHE_VALID_JIFFIES HZ 147 148 struct sthyi_info { 149 void *info; 150 unsigned long end; 151 }; 152 153 static DEFINE_MUTEX(sthyi_mutex); 154 static struct sthyi_info sthyi_cache; 155 156 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 157 { 158 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 159 } 160 161 /* 162 * Scales the cpu capping from the lpar range to the one expected in 163 * sthyi data. 164 * 165 * diag204 reports a cap in hundredths of processor units. 166 * z/VM's range for one core is 0 - 0x10000. 167 */ 168 static u32 scale_cap(u32 in) 169 { 170 return (0x10000 * in) / 100; 171 } 172 173 static void fill_hdr(struct sthyi_sctns *sctns) 174 { 175 sctns->hdr.infhdln = sizeof(sctns->hdr); 176 sctns->hdr.infmoff = sizeof(sctns->hdr); 177 sctns->hdr.infmlen = sizeof(sctns->mac); 178 sctns->hdr.infplen = sizeof(sctns->par); 179 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 180 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 181 } 182 183 static void fill_stsi_mac(struct sthyi_sctns *sctns, 184 struct sysinfo_1_1_1 *sysinfo) 185 { 186 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 187 if (*(u64 *)sctns->mac.infmname != 0) 188 sctns->mac.infmval1 |= MAC_NAME_VLD; 189 190 if (stsi(sysinfo, 1, 1, 1)) 191 return; 192 193 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 194 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 195 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 196 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 197 198 sctns->mac.infmval1 |= MAC_ID_VLD; 199 } 200 201 static void fill_stsi_par(struct sthyi_sctns *sctns, 202 struct sysinfo_2_2_2 *sysinfo) 203 { 204 if (stsi(sysinfo, 2, 2, 2)) 205 return; 206 207 sctns->par.infppnum = sysinfo->lpar_number; 208 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 209 210 sctns->par.infpval1 |= PAR_ID_VLD; 211 } 212 213 static void fill_stsi(struct sthyi_sctns *sctns) 214 { 215 void *sysinfo; 216 217 /* Errors are handled through the validity bits in the response. */ 218 sysinfo = (void *)__get_free_page(GFP_KERNEL); 219 if (!sysinfo) 220 return; 221 222 fill_stsi_mac(sctns, sysinfo); 223 fill_stsi_par(sctns, sysinfo); 224 225 free_pages((unsigned long)sysinfo, 0); 226 } 227 228 static void fill_diag_mac(struct sthyi_sctns *sctns, 229 struct diag204_x_phys_block *block, 230 void *diag224_buf) 231 { 232 int i; 233 234 for (i = 0; i < block->hdr.cpus; i++) { 235 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 236 case CP: 237 if (block->cpus[i].weight == DED_WEIGHT) 238 sctns->mac.infmdcps++; 239 else 240 sctns->mac.infmscps++; 241 break; 242 case IFL: 243 if (block->cpus[i].weight == DED_WEIGHT) 244 sctns->mac.infmdifl++; 245 else 246 sctns->mac.infmsifl++; 247 break; 248 } 249 } 250 sctns->mac.infmval1 |= MAC_CNT_VLD; 251 } 252 253 /* Returns a pointer to the the next partition block. */ 254 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 255 bool this_lpar, 256 void *diag224_buf, 257 struct diag204_x_part_block *block) 258 { 259 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 260 struct cpu_inf *cpu_inf; 261 262 for (i = 0; i < block->hdr.rcpus; i++) { 263 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 264 continue; 265 266 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 267 case CP: 268 cpu_inf = &part_inf->cp; 269 if (block->cpus[i].cur_weight < DED_WEIGHT) 270 weight_cp |= block->cpus[i].cur_weight; 271 break; 272 case IFL: 273 cpu_inf = &part_inf->ifl; 274 if (block->cpus[i].cur_weight < DED_WEIGHT) 275 weight_ifl |= block->cpus[i].cur_weight; 276 break; 277 default: 278 continue; 279 } 280 281 if (!this_lpar) 282 continue; 283 284 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 285 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 286 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 287 288 if (block->cpus[i].weight == DED_WEIGHT) 289 cpu_inf->cpu_num_ded += 1; 290 else 291 cpu_inf->cpu_num_shd += 1; 292 } 293 294 if (this_lpar && capped) { 295 part_inf->cp.lpar_weight = weight_cp; 296 part_inf->ifl.lpar_weight = weight_ifl; 297 } 298 part_inf->cp.all_weight += weight_cp; 299 part_inf->ifl.all_weight += weight_ifl; 300 return (struct diag204_x_part_block *)&block->cpus[i]; 301 } 302 303 static void *diag204_get_data(bool diag204_allow_busy) 304 { 305 unsigned long subcode; 306 void *diag204_buf; 307 int pages, rc; 308 309 subcode = DIAG204_SUBC_RSI; 310 subcode |= DIAG204_INFO_EXT; 311 pages = diag204(subcode, 0, NULL); 312 if (pages < 0) 313 return ERR_PTR(pages); 314 if (pages == 0) 315 return ERR_PTR(-ENODATA); 316 diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), 317 PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, 318 __builtin_return_address(0)); 319 if (!diag204_buf) 320 return ERR_PTR(-ENOMEM); 321 subcode = DIAG204_SUBC_STIB7; 322 subcode |= DIAG204_INFO_EXT; 323 if (diag204_has_bif() && diag204_allow_busy) 324 subcode |= DIAG204_BIF_BIT; 325 rc = diag204(subcode, pages, diag204_buf); 326 if (rc < 0) { 327 vfree(diag204_buf); 328 return ERR_PTR(rc); 329 } 330 return diag204_buf; 331 } 332 333 static bool is_diag204_cached(struct sthyi_sctns *sctns) 334 { 335 /* 336 * Check if validity bits are set when diag204 data 337 * is gathered. 338 */ 339 if (sctns->par.infpval1) 340 return true; 341 return false; 342 } 343 344 static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf) 345 { 346 int i; 347 bool this_lpar; 348 void *diag224_buf = NULL; 349 struct diag204_x_info_blk_hdr *ti_hdr; 350 struct diag204_x_part_block *part_block; 351 struct diag204_x_phys_block *phys_block; 352 struct lpar_cpu_inf lpar_inf = {}; 353 354 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 355 if (!diag224_buf || diag224(diag224_buf)) 356 goto out; 357 358 ti_hdr = diag204_buf; 359 part_block = diag204_buf + sizeof(*ti_hdr); 360 361 for (i = 0; i < ti_hdr->npar; i++) { 362 /* 363 * For the calling lpar we also need to get the cpu 364 * caps and weights. The time information block header 365 * specifies the offset to the partition block of the 366 * caller lpar, so we know when we process its data. 367 */ 368 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 369 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 370 part_block); 371 } 372 373 phys_block = (struct diag204_x_phys_block *)part_block; 374 part_block = diag204_buf + ti_hdr->this_part; 375 if (part_block->hdr.mtid) 376 sctns->par.infpflg1 = PAR_MT_EN; 377 378 sctns->par.infpval1 |= PAR_GRP_VLD; 379 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 380 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 381 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 382 sizeof(sctns->par.infplgnm)); 383 384 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 385 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 386 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 387 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 388 sctns->par.infpval1 |= PAR_PCNT_VLD; 389 390 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 391 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 392 sctns->par.infpval1 |= PAR_ABS_VLD; 393 394 /* 395 * Everything below needs global performance data to be 396 * meaningful. 397 */ 398 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 399 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 400 goto out; 401 } 402 403 fill_diag_mac(sctns, phys_block, diag224_buf); 404 405 if (lpar_inf.cp.lpar_weight) { 406 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 407 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 408 } 409 410 if (lpar_inf.ifl.lpar_weight) { 411 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 412 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 413 } 414 sctns->par.infpval1 |= PAR_WGHT_VLD; 415 416 out: 417 free_page((unsigned long)diag224_buf); 418 } 419 420 static int sthyi(u64 vaddr, u64 *rc) 421 { 422 union register_pair r1 = { .even = 0, }; /* subcode */ 423 union register_pair r2 = { .even = vaddr, }; 424 int cc; 425 426 asm volatile( 427 ".insn rre,0xB2560000,%[r1],%[r2]\n" 428 "ipm %[cc]\n" 429 "srl %[cc],28\n" 430 : [cc] "=&d" (cc), [r2] "+&d" (r2.pair) 431 : [r1] "d" (r1.pair) 432 : "memory", "cc"); 433 *rc = r2.odd; 434 return cc; 435 } 436 437 static int fill_dst(void *dst, u64 *rc) 438 { 439 void *diag204_buf; 440 441 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 442 443 /* 444 * If the facility is on, we don't want to emulate the instruction. 445 * We ask the hypervisor to provide the data. 446 */ 447 if (test_facility(74)) { 448 memset(dst, 0, PAGE_SIZE); 449 return sthyi((u64)dst, rc); 450 } 451 /* 452 * When emulating, if diag204 returns BUSY don't reset dst buffer 453 * and use cached data. 454 */ 455 *rc = 0; 456 diag204_buf = diag204_get_data(is_diag204_cached(sctns)); 457 if (IS_ERR(diag204_buf)) 458 return PTR_ERR(diag204_buf); 459 memset(dst, 0, PAGE_SIZE); 460 fill_hdr(sctns); 461 fill_stsi(sctns); 462 fill_diag(sctns, diag204_buf); 463 vfree(diag204_buf); 464 return 0; 465 } 466 467 static int sthyi_init_cache(void) 468 { 469 if (sthyi_cache.info) 470 return 0; 471 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 472 if (!sthyi_cache.info) 473 return -ENOMEM; 474 sthyi_cache.end = jiffies - 1; /* expired */ 475 return 0; 476 } 477 478 static int sthyi_update_cache(u64 *rc) 479 { 480 int r; 481 482 r = fill_dst(sthyi_cache.info, rc); 483 if (r == 0) { 484 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 485 } else if (r == -EBUSY) { 486 /* mark as expired and return 0 to keep using cached data */ 487 sthyi_cache.end = jiffies - 1; 488 r = 0; 489 } 490 return r; 491 } 492 493 /* 494 * sthyi_fill - Fill page with data returned by the STHYI instruction 495 * 496 * @dst: Pointer to zeroed page 497 * @rc: Pointer for storing the return code of the instruction 498 * 499 * Fills the destination with system information returned by the STHYI 500 * instruction. The data is generated by emulation or execution of STHYI, 501 * if available. The return value is either a negative error value or 502 * the condition code that would be returned, the rc parameter is the 503 * return code which is passed in register R2 + 1. 504 */ 505 int sthyi_fill(void *dst, u64 *rc) 506 { 507 int r; 508 509 mutex_lock(&sthyi_mutex); 510 r = sthyi_init_cache(); 511 if (r) 512 goto out; 513 514 if (time_is_before_jiffies(sthyi_cache.end)) { 515 /* cache expired */ 516 r = sthyi_update_cache(rc); 517 if (r) 518 goto out; 519 } 520 *rc = 0; 521 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 522 out: 523 mutex_unlock(&sthyi_mutex); 524 return r; 525 } 526 EXPORT_SYMBOL_GPL(sthyi_fill); 527 528 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 529 u64 __user *, return_code, unsigned long, flags) 530 { 531 u64 sthyi_rc; 532 void *info; 533 int r; 534 535 if (flags) 536 return -EINVAL; 537 if (function_code != STHYI_FC_CP_IFL_CAP) 538 return -EOPNOTSUPP; 539 info = (void *)get_zeroed_page(GFP_KERNEL); 540 if (!info) 541 return -ENOMEM; 542 r = sthyi_fill(info, &sthyi_rc); 543 if (r < 0) 544 goto out; 545 if (return_code && put_user(sthyi_rc, return_code)) { 546 r = -EFAULT; 547 goto out; 548 } 549 if (copy_to_user(buffer, info, PAGE_SIZE)) 550 r = -EFAULT; 551 out: 552 free_page((unsigned long)info); 553 return r; 554 } 555