1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * store hypervisor information instruction emulation functions. 4 * 5 * Copyright IBM Corp. 2016 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 7 */ 8 9 #include <linux/export.h> 10 #include <linux/errno.h> 11 #include <linux/pagemap.h> 12 #include <linux/vmalloc.h> 13 #include <linux/syscalls.h> 14 #include <linux/mutex.h> 15 #include <asm/asm-offsets.h> 16 #include <asm/sclp.h> 17 #include <asm/diag.h> 18 #include <asm/sysinfo.h> 19 #include <asm/ebcdic.h> 20 #include <asm/facility.h> 21 #include <asm/sthyi.h> 22 #include <asm/asm.h> 23 #include "entry.h" 24 25 #define DED_WEIGHT 0xffff 26 /* 27 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 28 * as they are justified with spaces. 29 */ 30 #define CP 0xc3d7404040404040UL 31 #define IFL 0xc9c6d34040404040UL 32 33 enum hdr_flags { 34 HDR_NOT_LPAR = 0x10, 35 HDR_STACK_INCM = 0x20, 36 HDR_STSI_UNAV = 0x40, 37 HDR_PERF_UNAV = 0x80, 38 }; 39 40 enum mac_validity { 41 MAC_NAME_VLD = 0x20, 42 MAC_ID_VLD = 0x40, 43 MAC_CNT_VLD = 0x80, 44 }; 45 46 enum par_flag { 47 PAR_MT_EN = 0x80, 48 }; 49 50 enum par_validity { 51 PAR_GRP_VLD = 0x08, 52 PAR_ID_VLD = 0x10, 53 PAR_ABS_VLD = 0x20, 54 PAR_WGHT_VLD = 0x40, 55 PAR_PCNT_VLD = 0x80, 56 }; 57 58 struct hdr_sctn { 59 u8 infhflg1; 60 u8 infhflg2; /* reserved */ 61 u8 infhval1; /* reserved */ 62 u8 infhval2; /* reserved */ 63 u8 reserved[3]; 64 u8 infhygct; 65 u16 infhtotl; 66 u16 infhdln; 67 u16 infmoff; 68 u16 infmlen; 69 u16 infpoff; 70 u16 infplen; 71 u16 infhoff1; 72 u16 infhlen1; 73 u16 infgoff1; 74 u16 infglen1; 75 u16 infhoff2; 76 u16 infhlen2; 77 u16 infgoff2; 78 u16 infglen2; 79 u16 infhoff3; 80 u16 infhlen3; 81 u16 infgoff3; 82 u16 infglen3; 83 u8 reserved2[4]; 84 } __packed; 85 86 struct mac_sctn { 87 u8 infmflg1; /* reserved */ 88 u8 infmflg2; /* reserved */ 89 u8 infmval1; 90 u8 infmval2; /* reserved */ 91 u16 infmscps; 92 u16 infmdcps; 93 u16 infmsifl; 94 u16 infmdifl; 95 char infmname[8]; 96 char infmtype[4]; 97 char infmmanu[16]; 98 char infmseq[16]; 99 char infmpman[4]; 100 u8 reserved[4]; 101 } __packed; 102 103 struct par_sctn { 104 u8 infpflg1; 105 u8 infpflg2; /* reserved */ 106 u8 infpval1; 107 u8 infpval2; /* reserved */ 108 u16 infppnum; 109 u16 infpscps; 110 u16 infpdcps; 111 u16 infpsifl; 112 u16 infpdifl; 113 u16 reserved; 114 char infppnam[8]; 115 u32 infpwbcp; 116 u32 infpabcp; 117 u32 infpwbif; 118 u32 infpabif; 119 char infplgnm[8]; 120 u32 infplgcp; 121 u32 infplgif; 122 } __packed; 123 124 struct sthyi_sctns { 125 struct hdr_sctn hdr; 126 struct mac_sctn mac; 127 struct par_sctn par; 128 } __packed; 129 130 struct cpu_inf { 131 u64 lpar_cap; 132 u64 lpar_grp_cap; 133 u64 lpar_weight; 134 u64 all_weight; 135 int cpu_num_ded; 136 int cpu_num_shd; 137 }; 138 139 struct lpar_cpu_inf { 140 struct cpu_inf cp; 141 struct cpu_inf ifl; 142 }; 143 144 /* 145 * STHYI requires extensive locking in the higher hypervisors 146 * and is very computational/memory expensive. Therefore we 147 * cache the retrieved data whose valid period is 1s. 148 */ 149 #define CACHE_VALID_JIFFIES HZ 150 151 struct sthyi_info { 152 void *info; 153 unsigned long end; 154 }; 155 156 static DEFINE_MUTEX(sthyi_mutex); 157 static struct sthyi_info sthyi_cache; 158 159 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 160 { 161 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 162 } 163 164 /* 165 * Scales the cpu capping from the lpar range to the one expected in 166 * sthyi data. 167 * 168 * diag204 reports a cap in hundredths of processor units. 169 * z/VM's range for one core is 0 - 0x10000. 170 */ 171 static u32 scale_cap(u32 in) 172 { 173 return (0x10000 * in) / 100; 174 } 175 176 static void fill_hdr(struct sthyi_sctns *sctns) 177 { 178 sctns->hdr.infhdln = sizeof(sctns->hdr); 179 sctns->hdr.infmoff = sizeof(sctns->hdr); 180 sctns->hdr.infmlen = sizeof(sctns->mac); 181 sctns->hdr.infplen = sizeof(sctns->par); 182 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 183 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 184 } 185 186 static void fill_stsi_mac(struct sthyi_sctns *sctns, 187 struct sysinfo_1_1_1 *sysinfo) 188 { 189 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 190 if (*(u64 *)sctns->mac.infmname != 0) 191 sctns->mac.infmval1 |= MAC_NAME_VLD; 192 193 if (stsi(sysinfo, 1, 1, 1)) 194 return; 195 196 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 197 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 198 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 199 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 200 201 sctns->mac.infmval1 |= MAC_ID_VLD; 202 } 203 204 static void fill_stsi_par(struct sthyi_sctns *sctns, 205 struct sysinfo_2_2_2 *sysinfo) 206 { 207 if (stsi(sysinfo, 2, 2, 2)) 208 return; 209 210 sctns->par.infppnum = sysinfo->lpar_number; 211 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 212 213 sctns->par.infpval1 |= PAR_ID_VLD; 214 } 215 216 static void fill_stsi(struct sthyi_sctns *sctns) 217 { 218 void *sysinfo; 219 220 /* Errors are handled through the validity bits in the response. */ 221 sysinfo = (void *)__get_free_page(GFP_KERNEL); 222 if (!sysinfo) 223 return; 224 225 fill_stsi_mac(sctns, sysinfo); 226 fill_stsi_par(sctns, sysinfo); 227 228 free_pages((unsigned long)sysinfo, 0); 229 } 230 231 static void fill_diag_mac(struct sthyi_sctns *sctns, 232 struct diag204_x_phys_block *block, 233 void *diag224_buf) 234 { 235 int i; 236 237 for (i = 0; i < block->hdr.cpus; i++) { 238 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 239 case CP: 240 if (block->cpus[i].weight == DED_WEIGHT) 241 sctns->mac.infmdcps++; 242 else 243 sctns->mac.infmscps++; 244 break; 245 case IFL: 246 if (block->cpus[i].weight == DED_WEIGHT) 247 sctns->mac.infmdifl++; 248 else 249 sctns->mac.infmsifl++; 250 break; 251 } 252 } 253 sctns->mac.infmval1 |= MAC_CNT_VLD; 254 } 255 256 /* Returns a pointer to the the next partition block. */ 257 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 258 bool this_lpar, 259 void *diag224_buf, 260 struct diag204_x_part_block *block) 261 { 262 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 263 struct cpu_inf *cpu_inf; 264 265 for (i = 0; i < block->hdr.rcpus; i++) { 266 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 267 continue; 268 269 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 270 case CP: 271 cpu_inf = &part_inf->cp; 272 if (block->cpus[i].cur_weight < DED_WEIGHT) 273 weight_cp |= block->cpus[i].cur_weight; 274 break; 275 case IFL: 276 cpu_inf = &part_inf->ifl; 277 if (block->cpus[i].cur_weight < DED_WEIGHT) 278 weight_ifl |= block->cpus[i].cur_weight; 279 break; 280 default: 281 continue; 282 } 283 284 if (!this_lpar) 285 continue; 286 287 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 288 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 289 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 290 291 if (block->cpus[i].weight == DED_WEIGHT) 292 cpu_inf->cpu_num_ded += 1; 293 else 294 cpu_inf->cpu_num_shd += 1; 295 } 296 297 if (this_lpar && capped) { 298 part_inf->cp.lpar_weight = weight_cp; 299 part_inf->ifl.lpar_weight = weight_ifl; 300 } 301 part_inf->cp.all_weight += weight_cp; 302 part_inf->ifl.all_weight += weight_ifl; 303 return (struct diag204_x_part_block *)&block->cpus[i]; 304 } 305 306 static void *diag204_get_data(bool diag204_allow_busy) 307 { 308 unsigned long subcode; 309 void *diag204_buf; 310 int pages, rc; 311 312 subcode = DIAG204_SUBC_RSI; 313 subcode |= DIAG204_INFO_EXT; 314 pages = diag204(subcode, 0, NULL); 315 if (pages < 0) 316 return ERR_PTR(pages); 317 if (pages == 0) 318 return ERR_PTR(-ENODATA); 319 diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), 320 PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, 321 __builtin_return_address(0)); 322 if (!diag204_buf) 323 return ERR_PTR(-ENOMEM); 324 subcode = DIAG204_SUBC_STIB7; 325 subcode |= DIAG204_INFO_EXT; 326 if (diag204_has_bif() && diag204_allow_busy) 327 subcode |= DIAG204_BIF_BIT; 328 rc = diag204(subcode, pages, diag204_buf); 329 if (rc < 0) { 330 vfree(diag204_buf); 331 return ERR_PTR(rc); 332 } 333 return diag204_buf; 334 } 335 336 static bool is_diag204_cached(struct sthyi_sctns *sctns) 337 { 338 /* 339 * Check if validity bits are set when diag204 data 340 * is gathered. 341 */ 342 if (sctns->par.infpval1) 343 return true; 344 return false; 345 } 346 347 static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf) 348 { 349 int i; 350 bool this_lpar; 351 void *diag224_buf = NULL; 352 struct diag204_x_info_blk_hdr *ti_hdr; 353 struct diag204_x_part_block *part_block; 354 struct diag204_x_phys_block *phys_block; 355 struct lpar_cpu_inf lpar_inf = {}; 356 357 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 358 if (!diag224_buf || diag224(diag224_buf)) 359 goto out; 360 361 ti_hdr = diag204_buf; 362 part_block = diag204_buf + sizeof(*ti_hdr); 363 364 for (i = 0; i < ti_hdr->npar; i++) { 365 /* 366 * For the calling lpar we also need to get the cpu 367 * caps and weights. The time information block header 368 * specifies the offset to the partition block of the 369 * caller lpar, so we know when we process its data. 370 */ 371 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 372 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 373 part_block); 374 } 375 376 phys_block = (struct diag204_x_phys_block *)part_block; 377 part_block = diag204_buf + ti_hdr->this_part; 378 if (part_block->hdr.mtid) 379 sctns->par.infpflg1 = PAR_MT_EN; 380 381 sctns->par.infpval1 |= PAR_GRP_VLD; 382 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 383 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 384 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 385 sizeof(sctns->par.infplgnm)); 386 387 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 388 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 389 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 390 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 391 sctns->par.infpval1 |= PAR_PCNT_VLD; 392 393 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 394 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 395 sctns->par.infpval1 |= PAR_ABS_VLD; 396 397 /* 398 * Everything below needs global performance data to be 399 * meaningful. 400 */ 401 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 402 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 403 goto out; 404 } 405 406 fill_diag_mac(sctns, phys_block, diag224_buf); 407 408 if (lpar_inf.cp.lpar_weight) { 409 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 410 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 411 } 412 413 if (lpar_inf.ifl.lpar_weight) { 414 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 415 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 416 } 417 sctns->par.infpval1 |= PAR_WGHT_VLD; 418 419 out: 420 free_page((unsigned long)diag224_buf); 421 } 422 423 static int sthyi(u64 vaddr, u64 *rc) 424 { 425 union register_pair r1 = { .even = 0, }; /* subcode */ 426 union register_pair r2 = { .even = vaddr, }; 427 int cc; 428 429 asm volatile( 430 ".insn rre,0xB2560000,%[r1],%[r2]\n" 431 CC_IPM(cc) 432 : CC_OUT(cc, cc), [r2] "+&d" (r2.pair) 433 : [r1] "d" (r1.pair) 434 : CC_CLOBBER_LIST("memory")); 435 *rc = r2.odd; 436 return CC_TRANSFORM(cc); 437 } 438 439 static int fill_dst(void *dst, u64 *rc) 440 { 441 void *diag204_buf; 442 443 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 444 445 /* 446 * If the facility is on, we don't want to emulate the instruction. 447 * We ask the hypervisor to provide the data. 448 */ 449 if (test_facility(74)) { 450 memset(dst, 0, PAGE_SIZE); 451 return sthyi((u64)dst, rc); 452 } 453 /* 454 * When emulating, if diag204 returns BUSY don't reset dst buffer 455 * and use cached data. 456 */ 457 *rc = 0; 458 diag204_buf = diag204_get_data(is_diag204_cached(sctns)); 459 if (IS_ERR(diag204_buf)) 460 return PTR_ERR(diag204_buf); 461 memset(dst, 0, PAGE_SIZE); 462 fill_hdr(sctns); 463 fill_stsi(sctns); 464 fill_diag(sctns, diag204_buf); 465 vfree(diag204_buf); 466 return 0; 467 } 468 469 static int sthyi_init_cache(void) 470 { 471 if (sthyi_cache.info) 472 return 0; 473 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 474 if (!sthyi_cache.info) 475 return -ENOMEM; 476 sthyi_cache.end = jiffies - 1; /* expired */ 477 return 0; 478 } 479 480 static int sthyi_update_cache(u64 *rc) 481 { 482 int r; 483 484 r = fill_dst(sthyi_cache.info, rc); 485 if (r == 0) { 486 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 487 } else if (r == -EBUSY) { 488 /* mark as expired and return 0 to keep using cached data */ 489 sthyi_cache.end = jiffies - 1; 490 r = 0; 491 } 492 return r; 493 } 494 495 /* 496 * sthyi_fill - Fill page with data returned by the STHYI instruction 497 * 498 * @dst: Pointer to zeroed page 499 * @rc: Pointer for storing the return code of the instruction 500 * 501 * Fills the destination with system information returned by the STHYI 502 * instruction. The data is generated by emulation or execution of STHYI, 503 * if available. The return value is either a negative error value or 504 * the condition code that would be returned, the rc parameter is the 505 * return code which is passed in register R2 + 1. 506 */ 507 int sthyi_fill(void *dst, u64 *rc) 508 { 509 int r; 510 511 mutex_lock(&sthyi_mutex); 512 r = sthyi_init_cache(); 513 if (r) 514 goto out; 515 516 if (time_is_before_jiffies(sthyi_cache.end)) { 517 /* cache expired */ 518 r = sthyi_update_cache(rc); 519 if (r) 520 goto out; 521 } 522 *rc = 0; 523 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 524 out: 525 mutex_unlock(&sthyi_mutex); 526 return r; 527 } 528 EXPORT_SYMBOL_GPL(sthyi_fill); 529 530 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 531 u64 __user *, return_code, unsigned long, flags) 532 { 533 u64 sthyi_rc; 534 void *info; 535 int r; 536 537 if (flags) 538 return -EINVAL; 539 if (function_code != STHYI_FC_CP_IFL_CAP) 540 return -EOPNOTSUPP; 541 info = (void *)get_zeroed_page(GFP_KERNEL); 542 if (!info) 543 return -ENOMEM; 544 r = sthyi_fill(info, &sthyi_rc); 545 if (r < 0) 546 goto out; 547 if (return_code && put_user(sthyi_rc, return_code)) { 548 r = -EFAULT; 549 goto out; 550 } 551 if (copy_to_user(buffer, info, PAGE_SIZE)) 552 r = -EFAULT; 553 out: 554 free_page((unsigned long)info); 555 return r; 556 } 557