1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * store hypervisor information instruction emulation functions. 4 * 5 * Copyright IBM Corp. 2016 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 7 */ 8 #include <linux/errno.h> 9 #include <linux/pagemap.h> 10 #include <linux/vmalloc.h> 11 #include <linux/syscalls.h> 12 #include <linux/mutex.h> 13 #include <asm/asm-offsets.h> 14 #include <asm/sclp.h> 15 #include <asm/diag.h> 16 #include <asm/sysinfo.h> 17 #include <asm/ebcdic.h> 18 #include <asm/facility.h> 19 #include <asm/sthyi.h> 20 #include "entry.h" 21 22 #define DED_WEIGHT 0xffff 23 /* 24 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 25 * as they are justified with spaces. 26 */ 27 #define CP 0xc3d7404040404040UL 28 #define IFL 0xc9c6d34040404040UL 29 30 enum hdr_flags { 31 HDR_NOT_LPAR = 0x10, 32 HDR_STACK_INCM = 0x20, 33 HDR_STSI_UNAV = 0x40, 34 HDR_PERF_UNAV = 0x80, 35 }; 36 37 enum mac_validity { 38 MAC_NAME_VLD = 0x20, 39 MAC_ID_VLD = 0x40, 40 MAC_CNT_VLD = 0x80, 41 }; 42 43 enum par_flag { 44 PAR_MT_EN = 0x80, 45 }; 46 47 enum par_validity { 48 PAR_GRP_VLD = 0x08, 49 PAR_ID_VLD = 0x10, 50 PAR_ABS_VLD = 0x20, 51 PAR_WGHT_VLD = 0x40, 52 PAR_PCNT_VLD = 0x80, 53 }; 54 55 struct hdr_sctn { 56 u8 infhflg1; 57 u8 infhflg2; /* reserved */ 58 u8 infhval1; /* reserved */ 59 u8 infhval2; /* reserved */ 60 u8 reserved[3]; 61 u8 infhygct; 62 u16 infhtotl; 63 u16 infhdln; 64 u16 infmoff; 65 u16 infmlen; 66 u16 infpoff; 67 u16 infplen; 68 u16 infhoff1; 69 u16 infhlen1; 70 u16 infgoff1; 71 u16 infglen1; 72 u16 infhoff2; 73 u16 infhlen2; 74 u16 infgoff2; 75 u16 infglen2; 76 u16 infhoff3; 77 u16 infhlen3; 78 u16 infgoff3; 79 u16 infglen3; 80 u8 reserved2[4]; 81 } __packed; 82 83 struct mac_sctn { 84 u8 infmflg1; /* reserved */ 85 u8 infmflg2; /* reserved */ 86 u8 infmval1; 87 u8 infmval2; /* reserved */ 88 u16 infmscps; 89 u16 infmdcps; 90 u16 infmsifl; 91 u16 infmdifl; 92 char infmname[8]; 93 char infmtype[4]; 94 char infmmanu[16]; 95 char infmseq[16]; 96 char infmpman[4]; 97 u8 reserved[4]; 98 } __packed; 99 100 struct par_sctn { 101 u8 infpflg1; 102 u8 infpflg2; /* reserved */ 103 u8 infpval1; 104 u8 infpval2; /* reserved */ 105 u16 infppnum; 106 u16 infpscps; 107 u16 infpdcps; 108 u16 infpsifl; 109 u16 infpdifl; 110 u16 reserved; 111 char infppnam[8]; 112 u32 infpwbcp; 113 u32 infpabcp; 114 u32 infpwbif; 115 u32 infpabif; 116 char infplgnm[8]; 117 u32 infplgcp; 118 u32 infplgif; 119 } __packed; 120 121 struct sthyi_sctns { 122 struct hdr_sctn hdr; 123 struct mac_sctn mac; 124 struct par_sctn par; 125 } __packed; 126 127 struct cpu_inf { 128 u64 lpar_cap; 129 u64 lpar_grp_cap; 130 u64 lpar_weight; 131 u64 all_weight; 132 int cpu_num_ded; 133 int cpu_num_shd; 134 }; 135 136 struct lpar_cpu_inf { 137 struct cpu_inf cp; 138 struct cpu_inf ifl; 139 }; 140 141 /* 142 * STHYI requires extensive locking in the higher hypervisors 143 * and is very computational/memory expensive. Therefore we 144 * cache the retrieved data whose valid period is 1s. 145 */ 146 #define CACHE_VALID_JIFFIES HZ 147 148 struct sthyi_info { 149 void *info; 150 unsigned long end; 151 }; 152 153 static DEFINE_MUTEX(sthyi_mutex); 154 static struct sthyi_info sthyi_cache; 155 156 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 157 { 158 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 159 } 160 161 /* 162 * Scales the cpu capping from the lpar range to the one expected in 163 * sthyi data. 164 * 165 * diag204 reports a cap in hundredths of processor units. 166 * z/VM's range for one core is 0 - 0x10000. 167 */ 168 static u32 scale_cap(u32 in) 169 { 170 return (0x10000 * in) / 100; 171 } 172 173 static void fill_hdr(struct sthyi_sctns *sctns) 174 { 175 sctns->hdr.infhdln = sizeof(sctns->hdr); 176 sctns->hdr.infmoff = sizeof(sctns->hdr); 177 sctns->hdr.infmlen = sizeof(sctns->mac); 178 sctns->hdr.infplen = sizeof(sctns->par); 179 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 180 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 181 } 182 183 static void fill_stsi_mac(struct sthyi_sctns *sctns, 184 struct sysinfo_1_1_1 *sysinfo) 185 { 186 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 187 if (*(u64 *)sctns->mac.infmname != 0) 188 sctns->mac.infmval1 |= MAC_NAME_VLD; 189 190 if (stsi(sysinfo, 1, 1, 1)) 191 return; 192 193 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 194 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 195 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 196 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 197 198 sctns->mac.infmval1 |= MAC_ID_VLD; 199 } 200 201 static void fill_stsi_par(struct sthyi_sctns *sctns, 202 struct sysinfo_2_2_2 *sysinfo) 203 { 204 if (stsi(sysinfo, 2, 2, 2)) 205 return; 206 207 sctns->par.infppnum = sysinfo->lpar_number; 208 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 209 210 sctns->par.infpval1 |= PAR_ID_VLD; 211 } 212 213 static void fill_stsi(struct sthyi_sctns *sctns) 214 { 215 void *sysinfo; 216 217 /* Errors are handled through the validity bits in the response. */ 218 sysinfo = (void *)__get_free_page(GFP_KERNEL); 219 if (!sysinfo) 220 return; 221 222 fill_stsi_mac(sctns, sysinfo); 223 fill_stsi_par(sctns, sysinfo); 224 225 free_pages((unsigned long)sysinfo, 0); 226 } 227 228 static void fill_diag_mac(struct sthyi_sctns *sctns, 229 struct diag204_x_phys_block *block, 230 void *diag224_buf) 231 { 232 int i; 233 234 for (i = 0; i < block->hdr.cpus; i++) { 235 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 236 case CP: 237 if (block->cpus[i].weight == DED_WEIGHT) 238 sctns->mac.infmdcps++; 239 else 240 sctns->mac.infmscps++; 241 break; 242 case IFL: 243 if (block->cpus[i].weight == DED_WEIGHT) 244 sctns->mac.infmdifl++; 245 else 246 sctns->mac.infmsifl++; 247 break; 248 } 249 } 250 sctns->mac.infmval1 |= MAC_CNT_VLD; 251 } 252 253 /* Returns a pointer to the the next partition block. */ 254 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 255 bool this_lpar, 256 void *diag224_buf, 257 struct diag204_x_part_block *block) 258 { 259 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 260 struct cpu_inf *cpu_inf; 261 262 for (i = 0; i < block->hdr.rcpus; i++) { 263 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 264 continue; 265 266 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 267 case CP: 268 cpu_inf = &part_inf->cp; 269 if (block->cpus[i].cur_weight < DED_WEIGHT) 270 weight_cp |= block->cpus[i].cur_weight; 271 break; 272 case IFL: 273 cpu_inf = &part_inf->ifl; 274 if (block->cpus[i].cur_weight < DED_WEIGHT) 275 weight_ifl |= block->cpus[i].cur_weight; 276 break; 277 default: 278 continue; 279 } 280 281 if (!this_lpar) 282 continue; 283 284 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 285 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 286 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 287 288 if (block->cpus[i].weight == DED_WEIGHT) 289 cpu_inf->cpu_num_ded += 1; 290 else 291 cpu_inf->cpu_num_shd += 1; 292 } 293 294 if (this_lpar && capped) { 295 part_inf->cp.lpar_weight = weight_cp; 296 part_inf->ifl.lpar_weight = weight_ifl; 297 } 298 part_inf->cp.all_weight += weight_cp; 299 part_inf->ifl.all_weight += weight_ifl; 300 return (struct diag204_x_part_block *)&block->cpus[i]; 301 } 302 303 static void fill_diag(struct sthyi_sctns *sctns) 304 { 305 int i, r, pages; 306 bool this_lpar; 307 void *diag204_buf; 308 void *diag224_buf = NULL; 309 struct diag204_x_info_blk_hdr *ti_hdr; 310 struct diag204_x_part_block *part_block; 311 struct diag204_x_phys_block *phys_block; 312 struct lpar_cpu_inf lpar_inf = {}; 313 314 /* Errors are handled through the validity bits in the response. */ 315 pages = diag204((unsigned long)DIAG204_SUBC_RSI | 316 (unsigned long)DIAG204_INFO_EXT, 0, NULL); 317 if (pages <= 0) 318 return; 319 320 diag204_buf = vmalloc(array_size(pages, PAGE_SIZE)); 321 if (!diag204_buf) 322 return; 323 324 r = diag204((unsigned long)DIAG204_SUBC_STIB7 | 325 (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); 326 if (r < 0) 327 goto out; 328 329 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 330 if (!diag224_buf || diag224(diag224_buf)) 331 goto out; 332 333 ti_hdr = diag204_buf; 334 part_block = diag204_buf + sizeof(*ti_hdr); 335 336 for (i = 0; i < ti_hdr->npar; i++) { 337 /* 338 * For the calling lpar we also need to get the cpu 339 * caps and weights. The time information block header 340 * specifies the offset to the partition block of the 341 * caller lpar, so we know when we process its data. 342 */ 343 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 344 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 345 part_block); 346 } 347 348 phys_block = (struct diag204_x_phys_block *)part_block; 349 part_block = diag204_buf + ti_hdr->this_part; 350 if (part_block->hdr.mtid) 351 sctns->par.infpflg1 = PAR_MT_EN; 352 353 sctns->par.infpval1 |= PAR_GRP_VLD; 354 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 355 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 356 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 357 sizeof(sctns->par.infplgnm)); 358 359 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 360 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 361 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 362 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 363 sctns->par.infpval1 |= PAR_PCNT_VLD; 364 365 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 366 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 367 sctns->par.infpval1 |= PAR_ABS_VLD; 368 369 /* 370 * Everything below needs global performance data to be 371 * meaningful. 372 */ 373 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 374 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 375 goto out; 376 } 377 378 fill_diag_mac(sctns, phys_block, diag224_buf); 379 380 if (lpar_inf.cp.lpar_weight) { 381 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 382 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 383 } 384 385 if (lpar_inf.ifl.lpar_weight) { 386 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 387 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 388 } 389 sctns->par.infpval1 |= PAR_WGHT_VLD; 390 391 out: 392 free_page((unsigned long)diag224_buf); 393 vfree(diag204_buf); 394 } 395 396 static int sthyi(u64 vaddr, u64 *rc) 397 { 398 union register_pair r1 = { .even = 0, }; /* subcode */ 399 union register_pair r2 = { .even = vaddr, }; 400 int cc; 401 402 asm volatile( 403 ".insn rre,0xB2560000,%[r1],%[r2]\n" 404 "ipm %[cc]\n" 405 "srl %[cc],28\n" 406 : [cc] "=&d" (cc), [r2] "+&d" (r2.pair) 407 : [r1] "d" (r1.pair) 408 : "memory", "cc"); 409 *rc = r2.odd; 410 return cc; 411 } 412 413 static int fill_dst(void *dst, u64 *rc) 414 { 415 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 416 417 /* 418 * If the facility is on, we don't want to emulate the instruction. 419 * We ask the hypervisor to provide the data. 420 */ 421 if (test_facility(74)) 422 return sthyi((u64)dst, rc); 423 424 fill_hdr(sctns); 425 fill_stsi(sctns); 426 fill_diag(sctns); 427 *rc = 0; 428 return 0; 429 } 430 431 static int sthyi_init_cache(void) 432 { 433 if (sthyi_cache.info) 434 return 0; 435 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 436 if (!sthyi_cache.info) 437 return -ENOMEM; 438 sthyi_cache.end = jiffies - 1; /* expired */ 439 return 0; 440 } 441 442 static int sthyi_update_cache(u64 *rc) 443 { 444 int r; 445 446 memset(sthyi_cache.info, 0, PAGE_SIZE); 447 r = fill_dst(sthyi_cache.info, rc); 448 if (r) 449 return r; 450 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 451 return r; 452 } 453 454 /* 455 * sthyi_fill - Fill page with data returned by the STHYI instruction 456 * 457 * @dst: Pointer to zeroed page 458 * @rc: Pointer for storing the return code of the instruction 459 * 460 * Fills the destination with system information returned by the STHYI 461 * instruction. The data is generated by emulation or execution of STHYI, 462 * if available. The return value is either a negative error value or 463 * the condition code that would be returned, the rc parameter is the 464 * return code which is passed in register R2 + 1. 465 */ 466 int sthyi_fill(void *dst, u64 *rc) 467 { 468 int r; 469 470 mutex_lock(&sthyi_mutex); 471 r = sthyi_init_cache(); 472 if (r) 473 goto out; 474 475 if (time_is_before_jiffies(sthyi_cache.end)) { 476 /* cache expired */ 477 r = sthyi_update_cache(rc); 478 if (r) 479 goto out; 480 } 481 *rc = 0; 482 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 483 out: 484 mutex_unlock(&sthyi_mutex); 485 return r; 486 } 487 EXPORT_SYMBOL_GPL(sthyi_fill); 488 489 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 490 u64 __user *, return_code, unsigned long, flags) 491 { 492 u64 sthyi_rc; 493 void *info; 494 int r; 495 496 if (flags) 497 return -EINVAL; 498 if (function_code != STHYI_FC_CP_IFL_CAP) 499 return -EOPNOTSUPP; 500 info = (void *)get_zeroed_page(GFP_KERNEL); 501 if (!info) 502 return -ENOMEM; 503 r = sthyi_fill(info, &sthyi_rc); 504 if (r < 0) 505 goto out; 506 if (return_code && put_user(sthyi_rc, return_code)) { 507 r = -EFAULT; 508 goto out; 509 } 510 if (copy_to_user(buffer, info, PAGE_SIZE)) 511 r = -EFAULT; 512 out: 513 free_page((unsigned long)info); 514 return r; 515 } 516