1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * store hypervisor information instruction emulation functions. 4 * 5 * Copyright IBM Corp. 2016 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 7 */ 8 #include <linux/errno.h> 9 #include <linux/pagemap.h> 10 #include <linux/vmalloc.h> 11 #include <linux/syscalls.h> 12 #include <linux/mutex.h> 13 #include <asm/asm-offsets.h> 14 #include <asm/sclp.h> 15 #include <asm/diag.h> 16 #include <asm/sysinfo.h> 17 #include <asm/ebcdic.h> 18 #include <asm/facility.h> 19 #include <asm/sthyi.h> 20 #include <asm/asm.h> 21 #include "entry.h" 22 23 #define DED_WEIGHT 0xffff 24 /* 25 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 26 * as they are justified with spaces. 27 */ 28 #define CP 0xc3d7404040404040UL 29 #define IFL 0xc9c6d34040404040UL 30 31 enum hdr_flags { 32 HDR_NOT_LPAR = 0x10, 33 HDR_STACK_INCM = 0x20, 34 HDR_STSI_UNAV = 0x40, 35 HDR_PERF_UNAV = 0x80, 36 }; 37 38 enum mac_validity { 39 MAC_NAME_VLD = 0x20, 40 MAC_ID_VLD = 0x40, 41 MAC_CNT_VLD = 0x80, 42 }; 43 44 enum par_flag { 45 PAR_MT_EN = 0x80, 46 }; 47 48 enum par_validity { 49 PAR_GRP_VLD = 0x08, 50 PAR_ID_VLD = 0x10, 51 PAR_ABS_VLD = 0x20, 52 PAR_WGHT_VLD = 0x40, 53 PAR_PCNT_VLD = 0x80, 54 }; 55 56 struct hdr_sctn { 57 u8 infhflg1; 58 u8 infhflg2; /* reserved */ 59 u8 infhval1; /* reserved */ 60 u8 infhval2; /* reserved */ 61 u8 reserved[3]; 62 u8 infhygct; 63 u16 infhtotl; 64 u16 infhdln; 65 u16 infmoff; 66 u16 infmlen; 67 u16 infpoff; 68 u16 infplen; 69 u16 infhoff1; 70 u16 infhlen1; 71 u16 infgoff1; 72 u16 infglen1; 73 u16 infhoff2; 74 u16 infhlen2; 75 u16 infgoff2; 76 u16 infglen2; 77 u16 infhoff3; 78 u16 infhlen3; 79 u16 infgoff3; 80 u16 infglen3; 81 u8 reserved2[4]; 82 } __packed; 83 84 struct mac_sctn { 85 u8 infmflg1; /* reserved */ 86 u8 infmflg2; /* reserved */ 87 u8 infmval1; 88 u8 infmval2; /* reserved */ 89 u16 infmscps; 90 u16 infmdcps; 91 u16 infmsifl; 92 u16 infmdifl; 93 char infmname[8]; 94 char infmtype[4]; 95 char infmmanu[16]; 96 char infmseq[16]; 97 char infmpman[4]; 98 u8 reserved[4]; 99 } __packed; 100 101 struct par_sctn { 102 u8 infpflg1; 103 u8 infpflg2; /* reserved */ 104 u8 infpval1; 105 u8 infpval2; /* reserved */ 106 u16 infppnum; 107 u16 infpscps; 108 u16 infpdcps; 109 u16 infpsifl; 110 u16 infpdifl; 111 u16 reserved; 112 char infppnam[8]; 113 u32 infpwbcp; 114 u32 infpabcp; 115 u32 infpwbif; 116 u32 infpabif; 117 char infplgnm[8]; 118 u32 infplgcp; 119 u32 infplgif; 120 } __packed; 121 122 struct sthyi_sctns { 123 struct hdr_sctn hdr; 124 struct mac_sctn mac; 125 struct par_sctn par; 126 } __packed; 127 128 struct cpu_inf { 129 u64 lpar_cap; 130 u64 lpar_grp_cap; 131 u64 lpar_weight; 132 u64 all_weight; 133 int cpu_num_ded; 134 int cpu_num_shd; 135 }; 136 137 struct lpar_cpu_inf { 138 struct cpu_inf cp; 139 struct cpu_inf ifl; 140 }; 141 142 /* 143 * STHYI requires extensive locking in the higher hypervisors 144 * and is very computational/memory expensive. Therefore we 145 * cache the retrieved data whose valid period is 1s. 146 */ 147 #define CACHE_VALID_JIFFIES HZ 148 149 struct sthyi_info { 150 void *info; 151 unsigned long end; 152 }; 153 154 static DEFINE_MUTEX(sthyi_mutex); 155 static struct sthyi_info sthyi_cache; 156 157 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 158 { 159 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 160 } 161 162 /* 163 * Scales the cpu capping from the lpar range to the one expected in 164 * sthyi data. 165 * 166 * diag204 reports a cap in hundredths of processor units. 167 * z/VM's range for one core is 0 - 0x10000. 168 */ 169 static u32 scale_cap(u32 in) 170 { 171 return (0x10000 * in) / 100; 172 } 173 174 static void fill_hdr(struct sthyi_sctns *sctns) 175 { 176 sctns->hdr.infhdln = sizeof(sctns->hdr); 177 sctns->hdr.infmoff = sizeof(sctns->hdr); 178 sctns->hdr.infmlen = sizeof(sctns->mac); 179 sctns->hdr.infplen = sizeof(sctns->par); 180 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 181 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 182 } 183 184 static void fill_stsi_mac(struct sthyi_sctns *sctns, 185 struct sysinfo_1_1_1 *sysinfo) 186 { 187 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 188 if (*(u64 *)sctns->mac.infmname != 0) 189 sctns->mac.infmval1 |= MAC_NAME_VLD; 190 191 if (stsi(sysinfo, 1, 1, 1)) 192 return; 193 194 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 195 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 196 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 197 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 198 199 sctns->mac.infmval1 |= MAC_ID_VLD; 200 } 201 202 static void fill_stsi_par(struct sthyi_sctns *sctns, 203 struct sysinfo_2_2_2 *sysinfo) 204 { 205 if (stsi(sysinfo, 2, 2, 2)) 206 return; 207 208 sctns->par.infppnum = sysinfo->lpar_number; 209 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 210 211 sctns->par.infpval1 |= PAR_ID_VLD; 212 } 213 214 static void fill_stsi(struct sthyi_sctns *sctns) 215 { 216 void *sysinfo; 217 218 /* Errors are handled through the validity bits in the response. */ 219 sysinfo = (void *)__get_free_page(GFP_KERNEL); 220 if (!sysinfo) 221 return; 222 223 fill_stsi_mac(sctns, sysinfo); 224 fill_stsi_par(sctns, sysinfo); 225 226 free_pages((unsigned long)sysinfo, 0); 227 } 228 229 static void fill_diag_mac(struct sthyi_sctns *sctns, 230 struct diag204_x_phys_block *block, 231 void *diag224_buf) 232 { 233 int i; 234 235 for (i = 0; i < block->hdr.cpus; i++) { 236 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 237 case CP: 238 if (block->cpus[i].weight == DED_WEIGHT) 239 sctns->mac.infmdcps++; 240 else 241 sctns->mac.infmscps++; 242 break; 243 case IFL: 244 if (block->cpus[i].weight == DED_WEIGHT) 245 sctns->mac.infmdifl++; 246 else 247 sctns->mac.infmsifl++; 248 break; 249 } 250 } 251 sctns->mac.infmval1 |= MAC_CNT_VLD; 252 } 253 254 /* Returns a pointer to the the next partition block. */ 255 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 256 bool this_lpar, 257 void *diag224_buf, 258 struct diag204_x_part_block *block) 259 { 260 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 261 struct cpu_inf *cpu_inf; 262 263 for (i = 0; i < block->hdr.rcpus; i++) { 264 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 265 continue; 266 267 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 268 case CP: 269 cpu_inf = &part_inf->cp; 270 if (block->cpus[i].cur_weight < DED_WEIGHT) 271 weight_cp |= block->cpus[i].cur_weight; 272 break; 273 case IFL: 274 cpu_inf = &part_inf->ifl; 275 if (block->cpus[i].cur_weight < DED_WEIGHT) 276 weight_ifl |= block->cpus[i].cur_weight; 277 break; 278 default: 279 continue; 280 } 281 282 if (!this_lpar) 283 continue; 284 285 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 286 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 287 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 288 289 if (block->cpus[i].weight == DED_WEIGHT) 290 cpu_inf->cpu_num_ded += 1; 291 else 292 cpu_inf->cpu_num_shd += 1; 293 } 294 295 if (this_lpar && capped) { 296 part_inf->cp.lpar_weight = weight_cp; 297 part_inf->ifl.lpar_weight = weight_ifl; 298 } 299 part_inf->cp.all_weight += weight_cp; 300 part_inf->ifl.all_weight += weight_ifl; 301 return (struct diag204_x_part_block *)&block->cpus[i]; 302 } 303 304 static void *diag204_get_data(bool diag204_allow_busy) 305 { 306 unsigned long subcode; 307 void *diag204_buf; 308 int pages, rc; 309 310 subcode = DIAG204_SUBC_RSI; 311 subcode |= DIAG204_INFO_EXT; 312 pages = diag204(subcode, 0, NULL); 313 if (pages < 0) 314 return ERR_PTR(pages); 315 if (pages == 0) 316 return ERR_PTR(-ENODATA); 317 diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), 318 PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, 319 __builtin_return_address(0)); 320 if (!diag204_buf) 321 return ERR_PTR(-ENOMEM); 322 subcode = DIAG204_SUBC_STIB7; 323 subcode |= DIAG204_INFO_EXT; 324 if (diag204_has_bif() && diag204_allow_busy) 325 subcode |= DIAG204_BIF_BIT; 326 rc = diag204(subcode, pages, diag204_buf); 327 if (rc < 0) { 328 vfree(diag204_buf); 329 return ERR_PTR(rc); 330 } 331 return diag204_buf; 332 } 333 334 static bool is_diag204_cached(struct sthyi_sctns *sctns) 335 { 336 /* 337 * Check if validity bits are set when diag204 data 338 * is gathered. 339 */ 340 if (sctns->par.infpval1) 341 return true; 342 return false; 343 } 344 345 static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf) 346 { 347 int i; 348 bool this_lpar; 349 void *diag224_buf = NULL; 350 struct diag204_x_info_blk_hdr *ti_hdr; 351 struct diag204_x_part_block *part_block; 352 struct diag204_x_phys_block *phys_block; 353 struct lpar_cpu_inf lpar_inf = {}; 354 355 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 356 if (!diag224_buf || diag224(diag224_buf)) 357 goto out; 358 359 ti_hdr = diag204_buf; 360 part_block = diag204_buf + sizeof(*ti_hdr); 361 362 for (i = 0; i < ti_hdr->npar; i++) { 363 /* 364 * For the calling lpar we also need to get the cpu 365 * caps and weights. The time information block header 366 * specifies the offset to the partition block of the 367 * caller lpar, so we know when we process its data. 368 */ 369 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 370 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 371 part_block); 372 } 373 374 phys_block = (struct diag204_x_phys_block *)part_block; 375 part_block = diag204_buf + ti_hdr->this_part; 376 if (part_block->hdr.mtid) 377 sctns->par.infpflg1 = PAR_MT_EN; 378 379 sctns->par.infpval1 |= PAR_GRP_VLD; 380 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 381 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 382 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 383 sizeof(sctns->par.infplgnm)); 384 385 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 386 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 387 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 388 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 389 sctns->par.infpval1 |= PAR_PCNT_VLD; 390 391 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 392 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 393 sctns->par.infpval1 |= PAR_ABS_VLD; 394 395 /* 396 * Everything below needs global performance data to be 397 * meaningful. 398 */ 399 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 400 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 401 goto out; 402 } 403 404 fill_diag_mac(sctns, phys_block, diag224_buf); 405 406 if (lpar_inf.cp.lpar_weight) { 407 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 408 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 409 } 410 411 if (lpar_inf.ifl.lpar_weight) { 412 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 413 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 414 } 415 sctns->par.infpval1 |= PAR_WGHT_VLD; 416 417 out: 418 free_page((unsigned long)diag224_buf); 419 } 420 421 static int sthyi(u64 vaddr, u64 *rc) 422 { 423 union register_pair r1 = { .even = 0, }; /* subcode */ 424 union register_pair r2 = { .even = vaddr, }; 425 int cc; 426 427 asm volatile( 428 ".insn rre,0xB2560000,%[r1],%[r2]\n" 429 CC_IPM(cc) 430 : CC_OUT(cc, cc), [r2] "+&d" (r2.pair) 431 : [r1] "d" (r1.pair) 432 : CC_CLOBBER_LIST("memory")); 433 *rc = r2.odd; 434 return CC_TRANSFORM(cc); 435 } 436 437 static int fill_dst(void *dst, u64 *rc) 438 { 439 void *diag204_buf; 440 441 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 442 443 /* 444 * If the facility is on, we don't want to emulate the instruction. 445 * We ask the hypervisor to provide the data. 446 */ 447 if (test_facility(74)) { 448 memset(dst, 0, PAGE_SIZE); 449 return sthyi((u64)dst, rc); 450 } 451 /* 452 * When emulating, if diag204 returns BUSY don't reset dst buffer 453 * and use cached data. 454 */ 455 *rc = 0; 456 diag204_buf = diag204_get_data(is_diag204_cached(sctns)); 457 if (IS_ERR(diag204_buf)) 458 return PTR_ERR(diag204_buf); 459 memset(dst, 0, PAGE_SIZE); 460 fill_hdr(sctns); 461 fill_stsi(sctns); 462 fill_diag(sctns, diag204_buf); 463 vfree(diag204_buf); 464 return 0; 465 } 466 467 static int sthyi_init_cache(void) 468 { 469 if (sthyi_cache.info) 470 return 0; 471 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 472 if (!sthyi_cache.info) 473 return -ENOMEM; 474 sthyi_cache.end = jiffies - 1; /* expired */ 475 return 0; 476 } 477 478 static int sthyi_update_cache(u64 *rc) 479 { 480 int r; 481 482 r = fill_dst(sthyi_cache.info, rc); 483 if (r == 0) { 484 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 485 } else if (r == -EBUSY) { 486 /* mark as expired and return 0 to keep using cached data */ 487 sthyi_cache.end = jiffies - 1; 488 r = 0; 489 } 490 return r; 491 } 492 493 /* 494 * sthyi_fill - Fill page with data returned by the STHYI instruction 495 * 496 * @dst: Pointer to zeroed page 497 * @rc: Pointer for storing the return code of the instruction 498 * 499 * Fills the destination with system information returned by the STHYI 500 * instruction. The data is generated by emulation or execution of STHYI, 501 * if available. The return value is either a negative error value or 502 * the condition code that would be returned, the rc parameter is the 503 * return code which is passed in register R2 + 1. 504 */ 505 int sthyi_fill(void *dst, u64 *rc) 506 { 507 int r; 508 509 mutex_lock(&sthyi_mutex); 510 r = sthyi_init_cache(); 511 if (r) 512 goto out; 513 514 if (time_is_before_jiffies(sthyi_cache.end)) { 515 /* cache expired */ 516 r = sthyi_update_cache(rc); 517 if (r) 518 goto out; 519 } 520 *rc = 0; 521 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 522 out: 523 mutex_unlock(&sthyi_mutex); 524 return r; 525 } 526 EXPORT_SYMBOL_GPL(sthyi_fill); 527 528 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 529 u64 __user *, return_code, unsigned long, flags) 530 { 531 u64 sthyi_rc; 532 void *info; 533 int r; 534 535 if (flags) 536 return -EINVAL; 537 if (function_code != STHYI_FC_CP_IFL_CAP) 538 return -EOPNOTSUPP; 539 info = (void *)get_zeroed_page(GFP_KERNEL); 540 if (!info) 541 return -ENOMEM; 542 r = sthyi_fill(info, &sthyi_rc); 543 if (r < 0) 544 goto out; 545 if (return_code && put_user(sthyi_rc, return_code)) { 546 r = -EFAULT; 547 goto out; 548 } 549 if (copy_to_user(buffer, info, PAGE_SIZE)) 550 r = -EFAULT; 551 out: 552 free_page((unsigned long)info); 553 return r; 554 } 555