1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * store hypervisor information instruction emulation functions. 4 * 5 * Copyright IBM Corp. 2016 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 7 */ 8 #include <linux/errno.h> 9 #include <linux/pagemap.h> 10 #include <linux/vmalloc.h> 11 #include <linux/syscalls.h> 12 #include <linux/mutex.h> 13 #include <asm/asm-offsets.h> 14 #include <asm/sclp.h> 15 #include <asm/diag.h> 16 #include <asm/sysinfo.h> 17 #include <asm/ebcdic.h> 18 #include <asm/facility.h> 19 #include <asm/sthyi.h> 20 #include "entry.h" 21 22 #define DED_WEIGHT 0xffff 23 /* 24 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 25 * as they are justified with spaces. 26 */ 27 #define CP 0xc3d7404040404040UL 28 #define IFL 0xc9c6d34040404040UL 29 30 enum hdr_flags { 31 HDR_NOT_LPAR = 0x10, 32 HDR_STACK_INCM = 0x20, 33 HDR_STSI_UNAV = 0x40, 34 HDR_PERF_UNAV = 0x80, 35 }; 36 37 enum mac_validity { 38 MAC_NAME_VLD = 0x20, 39 MAC_ID_VLD = 0x40, 40 MAC_CNT_VLD = 0x80, 41 }; 42 43 enum par_flag { 44 PAR_MT_EN = 0x80, 45 }; 46 47 enum par_validity { 48 PAR_GRP_VLD = 0x08, 49 PAR_ID_VLD = 0x10, 50 PAR_ABS_VLD = 0x20, 51 PAR_WGHT_VLD = 0x40, 52 PAR_PCNT_VLD = 0x80, 53 }; 54 55 struct hdr_sctn { 56 u8 infhflg1; 57 u8 infhflg2; /* reserved */ 58 u8 infhval1; /* reserved */ 59 u8 infhval2; /* reserved */ 60 u8 reserved[3]; 61 u8 infhygct; 62 u16 infhtotl; 63 u16 infhdln; 64 u16 infmoff; 65 u16 infmlen; 66 u16 infpoff; 67 u16 infplen; 68 u16 infhoff1; 69 u16 infhlen1; 70 u16 infgoff1; 71 u16 infglen1; 72 u16 infhoff2; 73 u16 infhlen2; 74 u16 infgoff2; 75 u16 infglen2; 76 u16 infhoff3; 77 u16 infhlen3; 78 u16 infgoff3; 79 u16 infglen3; 80 u8 reserved2[4]; 81 } __packed; 82 83 struct mac_sctn { 84 u8 infmflg1; /* reserved */ 85 u8 infmflg2; /* reserved */ 86 u8 infmval1; 87 u8 infmval2; /* reserved */ 88 u16 infmscps; 89 u16 infmdcps; 90 u16 infmsifl; 91 u16 infmdifl; 92 char infmname[8]; 93 char infmtype[4]; 94 char infmmanu[16]; 95 char infmseq[16]; 96 char infmpman[4]; 97 u8 reserved[4]; 98 } __packed; 99 100 struct par_sctn { 101 u8 infpflg1; 102 u8 infpflg2; /* reserved */ 103 u8 infpval1; 104 u8 infpval2; /* reserved */ 105 u16 infppnum; 106 u16 infpscps; 107 u16 infpdcps; 108 u16 infpsifl; 109 u16 infpdifl; 110 u16 reserved; 111 char infppnam[8]; 112 u32 infpwbcp; 113 u32 infpabcp; 114 u32 infpwbif; 115 u32 infpabif; 116 char infplgnm[8]; 117 u32 infplgcp; 118 u32 infplgif; 119 } __packed; 120 121 struct sthyi_sctns { 122 struct hdr_sctn hdr; 123 struct mac_sctn mac; 124 struct par_sctn par; 125 } __packed; 126 127 struct cpu_inf { 128 u64 lpar_cap; 129 u64 lpar_grp_cap; 130 u64 lpar_weight; 131 u64 all_weight; 132 int cpu_num_ded; 133 int cpu_num_shd; 134 }; 135 136 struct lpar_cpu_inf { 137 struct cpu_inf cp; 138 struct cpu_inf ifl; 139 }; 140 141 /* 142 * STHYI requires extensive locking in the higher hypervisors 143 * and is very computational/memory expensive. Therefore we 144 * cache the retrieved data whose valid period is 1s. 145 */ 146 #define CACHE_VALID_JIFFIES HZ 147 148 struct sthyi_info { 149 void *info; 150 unsigned long end; 151 }; 152 153 static DEFINE_MUTEX(sthyi_mutex); 154 static struct sthyi_info sthyi_cache; 155 156 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 157 { 158 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 159 } 160 161 /* 162 * Scales the cpu capping from the lpar range to the one expected in 163 * sthyi data. 164 * 165 * diag204 reports a cap in hundredths of processor units. 166 * z/VM's range for one core is 0 - 0x10000. 167 */ 168 static u32 scale_cap(u32 in) 169 { 170 return (0x10000 * in) / 100; 171 } 172 173 static void fill_hdr(struct sthyi_sctns *sctns) 174 { 175 sctns->hdr.infhdln = sizeof(sctns->hdr); 176 sctns->hdr.infmoff = sizeof(sctns->hdr); 177 sctns->hdr.infmlen = sizeof(sctns->mac); 178 sctns->hdr.infplen = sizeof(sctns->par); 179 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 180 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 181 } 182 183 static void fill_stsi_mac(struct sthyi_sctns *sctns, 184 struct sysinfo_1_1_1 *sysinfo) 185 { 186 if (stsi(sysinfo, 1, 1, 1)) 187 return; 188 189 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 190 191 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 192 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 193 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 194 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 195 196 sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; 197 } 198 199 static void fill_stsi_par(struct sthyi_sctns *sctns, 200 struct sysinfo_2_2_2 *sysinfo) 201 { 202 if (stsi(sysinfo, 2, 2, 2)) 203 return; 204 205 sctns->par.infppnum = sysinfo->lpar_number; 206 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 207 208 sctns->par.infpval1 |= PAR_ID_VLD; 209 } 210 211 static void fill_stsi(struct sthyi_sctns *sctns) 212 { 213 void *sysinfo; 214 215 /* Errors are handled through the validity bits in the response. */ 216 sysinfo = (void *)__get_free_page(GFP_KERNEL); 217 if (!sysinfo) 218 return; 219 220 fill_stsi_mac(sctns, sysinfo); 221 fill_stsi_par(sctns, sysinfo); 222 223 free_pages((unsigned long)sysinfo, 0); 224 } 225 226 static void fill_diag_mac(struct sthyi_sctns *sctns, 227 struct diag204_x_phys_block *block, 228 void *diag224_buf) 229 { 230 int i; 231 232 for (i = 0; i < block->hdr.cpus; i++) { 233 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 234 case CP: 235 if (block->cpus[i].weight == DED_WEIGHT) 236 sctns->mac.infmdcps++; 237 else 238 sctns->mac.infmscps++; 239 break; 240 case IFL: 241 if (block->cpus[i].weight == DED_WEIGHT) 242 sctns->mac.infmdifl++; 243 else 244 sctns->mac.infmsifl++; 245 break; 246 } 247 } 248 sctns->mac.infmval1 |= MAC_CNT_VLD; 249 } 250 251 /* Returns a pointer to the the next partition block. */ 252 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 253 bool this_lpar, 254 void *diag224_buf, 255 struct diag204_x_part_block *block) 256 { 257 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 258 struct cpu_inf *cpu_inf; 259 260 for (i = 0; i < block->hdr.rcpus; i++) { 261 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 262 continue; 263 264 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 265 case CP: 266 cpu_inf = &part_inf->cp; 267 if (block->cpus[i].cur_weight < DED_WEIGHT) 268 weight_cp |= block->cpus[i].cur_weight; 269 break; 270 case IFL: 271 cpu_inf = &part_inf->ifl; 272 if (block->cpus[i].cur_weight < DED_WEIGHT) 273 weight_ifl |= block->cpus[i].cur_weight; 274 break; 275 default: 276 continue; 277 } 278 279 if (!this_lpar) 280 continue; 281 282 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 283 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 284 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 285 286 if (block->cpus[i].weight == DED_WEIGHT) 287 cpu_inf->cpu_num_ded += 1; 288 else 289 cpu_inf->cpu_num_shd += 1; 290 } 291 292 if (this_lpar && capped) { 293 part_inf->cp.lpar_weight = weight_cp; 294 part_inf->ifl.lpar_weight = weight_ifl; 295 } 296 part_inf->cp.all_weight += weight_cp; 297 part_inf->ifl.all_weight += weight_ifl; 298 return (struct diag204_x_part_block *)&block->cpus[i]; 299 } 300 301 static void fill_diag(struct sthyi_sctns *sctns) 302 { 303 int i, r, pages; 304 bool this_lpar; 305 void *diag204_buf; 306 void *diag224_buf = NULL; 307 struct diag204_x_info_blk_hdr *ti_hdr; 308 struct diag204_x_part_block *part_block; 309 struct diag204_x_phys_block *phys_block; 310 struct lpar_cpu_inf lpar_inf = {}; 311 312 /* Errors are handled through the validity bits in the response. */ 313 pages = diag204((unsigned long)DIAG204_SUBC_RSI | 314 (unsigned long)DIAG204_INFO_EXT, 0, NULL); 315 if (pages <= 0) 316 return; 317 318 diag204_buf = vmalloc(array_size(pages, PAGE_SIZE)); 319 if (!diag204_buf) 320 return; 321 322 r = diag204((unsigned long)DIAG204_SUBC_STIB7 | 323 (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); 324 if (r < 0) 325 goto out; 326 327 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 328 if (!diag224_buf || diag224(diag224_buf)) 329 goto out; 330 331 ti_hdr = diag204_buf; 332 part_block = diag204_buf + sizeof(*ti_hdr); 333 334 for (i = 0; i < ti_hdr->npar; i++) { 335 /* 336 * For the calling lpar we also need to get the cpu 337 * caps and weights. The time information block header 338 * specifies the offset to the partition block of the 339 * caller lpar, so we know when we process its data. 340 */ 341 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 342 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 343 part_block); 344 } 345 346 phys_block = (struct diag204_x_phys_block *)part_block; 347 part_block = diag204_buf + ti_hdr->this_part; 348 if (part_block->hdr.mtid) 349 sctns->par.infpflg1 = PAR_MT_EN; 350 351 sctns->par.infpval1 |= PAR_GRP_VLD; 352 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 353 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 354 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 355 sizeof(sctns->par.infplgnm)); 356 357 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 358 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 359 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 360 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 361 sctns->par.infpval1 |= PAR_PCNT_VLD; 362 363 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 364 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 365 sctns->par.infpval1 |= PAR_ABS_VLD; 366 367 /* 368 * Everything below needs global performance data to be 369 * meaningful. 370 */ 371 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 372 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 373 goto out; 374 } 375 376 fill_diag_mac(sctns, phys_block, diag224_buf); 377 378 if (lpar_inf.cp.lpar_weight) { 379 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 380 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 381 } 382 383 if (lpar_inf.ifl.lpar_weight) { 384 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 385 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 386 } 387 sctns->par.infpval1 |= PAR_WGHT_VLD; 388 389 out: 390 free_page((unsigned long)diag224_buf); 391 vfree(diag204_buf); 392 } 393 394 static int sthyi(u64 vaddr, u64 *rc) 395 { 396 register u64 code asm("0") = 0; 397 register u64 addr asm("2") = vaddr; 398 register u64 rcode asm("3"); 399 int cc; 400 401 asm volatile( 402 ".insn rre,0xB2560000,%[code],%[addr]\n" 403 "ipm %[cc]\n" 404 "srl %[cc],28\n" 405 : [cc] "=d" (cc), "=d" (rcode) 406 : [code] "d" (code), [addr] "a" (addr) 407 : "memory", "cc"); 408 *rc = rcode; 409 return cc; 410 } 411 412 static int fill_dst(void *dst, u64 *rc) 413 { 414 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 415 416 /* 417 * If the facility is on, we don't want to emulate the instruction. 418 * We ask the hypervisor to provide the data. 419 */ 420 if (test_facility(74)) 421 return sthyi((u64)dst, rc); 422 423 fill_hdr(sctns); 424 fill_stsi(sctns); 425 fill_diag(sctns); 426 *rc = 0; 427 return 0; 428 } 429 430 static int sthyi_init_cache(void) 431 { 432 if (sthyi_cache.info) 433 return 0; 434 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 435 if (!sthyi_cache.info) 436 return -ENOMEM; 437 sthyi_cache.end = jiffies - 1; /* expired */ 438 return 0; 439 } 440 441 static int sthyi_update_cache(u64 *rc) 442 { 443 int r; 444 445 memset(sthyi_cache.info, 0, PAGE_SIZE); 446 r = fill_dst(sthyi_cache.info, rc); 447 if (r) 448 return r; 449 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 450 return r; 451 } 452 453 /* 454 * sthyi_fill - Fill page with data returned by the STHYI instruction 455 * 456 * @dst: Pointer to zeroed page 457 * @rc: Pointer for storing the return code of the instruction 458 * 459 * Fills the destination with system information returned by the STHYI 460 * instruction. The data is generated by emulation or execution of STHYI, 461 * if available. The return value is the condition code that would be 462 * returned, the rc parameter is the return code which is passed in 463 * register R2 + 1. 464 */ 465 int sthyi_fill(void *dst, u64 *rc) 466 { 467 int r; 468 469 mutex_lock(&sthyi_mutex); 470 r = sthyi_init_cache(); 471 if (r) 472 goto out; 473 474 if (time_is_before_jiffies(sthyi_cache.end)) { 475 /* cache expired */ 476 r = sthyi_update_cache(rc); 477 if (r) 478 goto out; 479 } 480 *rc = 0; 481 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 482 out: 483 mutex_unlock(&sthyi_mutex); 484 return r; 485 } 486 EXPORT_SYMBOL_GPL(sthyi_fill); 487 488 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 489 u64 __user *, return_code, unsigned long, flags) 490 { 491 u64 sthyi_rc; 492 void *info; 493 int r; 494 495 if (flags) 496 return -EINVAL; 497 if (function_code != STHYI_FC_CP_IFL_CAP) 498 return -EOPNOTSUPP; 499 info = (void *)get_zeroed_page(GFP_KERNEL); 500 if (!info) 501 return -ENOMEM; 502 r = sthyi_fill(info, &sthyi_rc); 503 if (r < 0) 504 goto out; 505 if (return_code && put_user(sthyi_rc, return_code)) { 506 r = -EFAULT; 507 goto out; 508 } 509 if (copy_to_user(buffer, info, PAGE_SIZE)) 510 r = -EFAULT; 511 out: 512 free_page((unsigned long)info); 513 return r; 514 } 515