1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _ASM_X86_RESCTRL_INTERNAL_H 3 #define _ASM_X86_RESCTRL_INTERNAL_H 4 5 #include <linux/resctrl.h> 6 #include <linux/sched.h> 7 #include <linux/kernfs.h> 8 #include <linux/fs_context.h> 9 #include <linux/jump_label.h> 10 #include <linux/tick.h> 11 12 #include <asm/resctrl.h> 13 14 #define L3_QOS_CDP_ENABLE 0x01ULL 15 16 #define L2_QOS_CDP_ENABLE 0x01ULL 17 18 #define CQM_LIMBOCHECK_INTERVAL 1000 19 20 #define MBM_CNTR_WIDTH_BASE 24 21 #define MBM_OVERFLOW_INTERVAL 1000 22 #define MAX_MBA_BW 100u 23 #define MBA_IS_LINEAR 0x4 24 #define MBM_CNTR_WIDTH_OFFSET_AMD 20 25 26 #define RMID_VAL_ERROR BIT_ULL(63) 27 #define RMID_VAL_UNAVAIL BIT_ULL(62) 28 /* 29 * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for 30 * data to be returned. The counter width is discovered from the hardware 31 * as an offset from MBM_CNTR_WIDTH_BASE. 32 */ 33 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) 34 35 /** 36 * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that 37 * aren't marked nohz_full 38 * @mask: The mask to pick a CPU from. 39 * @exclude_cpu:The CPU to avoid picking. 40 * 41 * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping 42 * CPUs that don't use nohz_full, these are preferred. Pass 43 * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs. 44 * 45 * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available. 46 */ 47 static inline unsigned int 48 cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) 49 { 50 unsigned int cpu, hk_cpu; 51 52 if (exclude_cpu == RESCTRL_PICK_ANY_CPU) 53 cpu = cpumask_any(mask); 54 else 55 cpu = cpumask_any_but(mask, exclude_cpu); 56 57 /* Only continue if tick_nohz_full_mask has been initialized. */ 58 if (!tick_nohz_full_enabled()) 59 return cpu; 60 61 /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ 62 if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu)) 63 return cpu; 64 65 /* Try to find a CPU that isn't nohz_full to use in preference */ 66 hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask); 67 if (hk_cpu == exclude_cpu) 68 hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask); 69 70 if (hk_cpu < nr_cpu_ids) 71 cpu = hk_cpu; 72 73 return cpu; 74 } 75 76 struct rdt_fs_context { 77 struct kernfs_fs_context kfc; 78 bool enable_cdpl2; 79 bool enable_cdpl3; 80 bool enable_mba_mbps; 81 bool enable_debug; 82 }; 83 84 static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) 85 { 86 struct kernfs_fs_context *kfc = fc->fs_private; 87 88 return container_of(kfc, struct rdt_fs_context, kfc); 89 } 90 91 /** 92 * struct mon_evt - Entry in the event list of a resource 93 * @evtid: event id 94 * @name: name of the event 95 * @configurable: true if the event is configurable 96 * @list: entry in &rdt_resource->evt_list 97 */ 98 struct mon_evt { 99 enum resctrl_event_id evtid; 100 char *name; 101 bool configurable; 102 struct list_head list; 103 }; 104 105 /** 106 * union mon_data_bits - Monitoring details for each event file. 107 * @priv: Used to store monitoring event data in @u 108 * as kernfs private data. 109 * @u.rid: Resource id associated with the event file. 110 * @u.evtid: Event id associated with the event file. 111 * @u.sum: Set when event must be summed across multiple 112 * domains. 113 * @u.domid: When @u.sum is zero this is the domain to which 114 * the event file belongs. When @sum is one this 115 * is the id of the L3 cache that all domains to be 116 * summed share. 117 * @u: Name of the bit fields struct. 118 */ 119 union mon_data_bits { 120 void *priv; 121 struct { 122 unsigned int rid : 10; 123 enum resctrl_event_id evtid : 7; 124 unsigned int sum : 1; 125 unsigned int domid : 14; 126 } u; 127 }; 128 129 /** 130 * struct rmid_read - Data passed across smp_call*() to read event count. 131 * @rgrp: Resource group for which the counter is being read. If it is a parent 132 * resource group then its event count is summed with the count from all 133 * its child resource groups. 134 * @r: Resource describing the properties of the event being read. 135 * @d: Domain that the counter should be read from. If NULL then sum all 136 * domains in @r sharing L3 @ci.id 137 * @evtid: Which monitor event to read. 138 * @first: Initialize MBM counter when true. 139 * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. 140 * @err: Error encountered when reading counter. 141 * @val: Returned value of event counter. If @rgrp is a parent resource group, 142 * @val includes the sum of event counts from its child resource groups. 143 * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id, 144 * (summed across child resource groups if @rgrp is a parent resource group). 145 * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only). 146 */ 147 struct rmid_read { 148 struct rdtgroup *rgrp; 149 struct rdt_resource *r; 150 struct rdt_mon_domain *d; 151 enum resctrl_event_id evtid; 152 bool first; 153 struct cacheinfo *ci; 154 int err; 155 u64 val; 156 void *arch_mon_ctx; 157 }; 158 159 extern struct list_head resctrl_schema_all; 160 extern bool resctrl_mounted; 161 162 enum rdt_group_type { 163 RDTCTRL_GROUP = 0, 164 RDTMON_GROUP, 165 RDT_NUM_GROUP, 166 }; 167 168 /** 169 * enum rdtgrp_mode - Mode of a RDT resource group 170 * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations 171 * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed 172 * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking 173 * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations 174 * allowed AND the allocations are Cache Pseudo-Locked 175 * @RDT_NUM_MODES: Total number of modes 176 * 177 * The mode of a resource group enables control over the allowed overlap 178 * between allocations associated with different resource groups (classes 179 * of service). User is able to modify the mode of a resource group by 180 * writing to the "mode" resctrl file associated with the resource group. 181 * 182 * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by 183 * writing the appropriate text to the "mode" file. A resource group enters 184 * "pseudo-locked" mode after the schemata is written while the resource 185 * group is in "pseudo-locksetup" mode. 186 */ 187 enum rdtgrp_mode { 188 RDT_MODE_SHAREABLE = 0, 189 RDT_MODE_EXCLUSIVE, 190 RDT_MODE_PSEUDO_LOCKSETUP, 191 RDT_MODE_PSEUDO_LOCKED, 192 193 /* Must be last */ 194 RDT_NUM_MODES, 195 }; 196 197 /** 198 * struct mongroup - store mon group's data in resctrl fs. 199 * @mon_data_kn: kernfs node for the mon_data directory 200 * @parent: parent rdtgrp 201 * @crdtgrp_list: child rdtgroup node list 202 * @rmid: rmid for this rdtgroup 203 */ 204 struct mongroup { 205 struct kernfs_node *mon_data_kn; 206 struct rdtgroup *parent; 207 struct list_head crdtgrp_list; 208 u32 rmid; 209 }; 210 211 /** 212 * struct rdtgroup - store rdtgroup's data in resctrl file system. 213 * @kn: kernfs node 214 * @rdtgroup_list: linked list for all rdtgroups 215 * @closid: closid for this rdtgroup 216 * @cpu_mask: CPUs assigned to this rdtgroup 217 * @flags: status bits 218 * @waitcount: how many cpus expect to find this 219 * group when they acquire rdtgroup_mutex 220 * @type: indicates type of this rdtgroup - either 221 * monitor only or ctrl_mon group 222 * @mon: mongroup related data 223 * @mode: mode of resource group 224 * @mba_mbps_event: input monitoring event id when mba_sc is enabled 225 * @plr: pseudo-locked region 226 */ 227 struct rdtgroup { 228 struct kernfs_node *kn; 229 struct list_head rdtgroup_list; 230 u32 closid; 231 struct cpumask cpu_mask; 232 int flags; 233 atomic_t waitcount; 234 enum rdt_group_type type; 235 struct mongroup mon; 236 enum rdtgrp_mode mode; 237 enum resctrl_event_id mba_mbps_event; 238 struct pseudo_lock_region *plr; 239 }; 240 241 /* rdtgroup.flags */ 242 #define RDT_DELETED 1 243 244 /* rftype.flags */ 245 #define RFTYPE_FLAGS_CPUS_LIST 1 246 247 /* 248 * Define the file type flags for base and info directories. 249 */ 250 #define RFTYPE_INFO BIT(0) 251 #define RFTYPE_BASE BIT(1) 252 #define RFTYPE_CTRL BIT(4) 253 #define RFTYPE_MON BIT(5) 254 #define RFTYPE_TOP BIT(6) 255 #define RFTYPE_RES_CACHE BIT(8) 256 #define RFTYPE_RES_MB BIT(9) 257 #define RFTYPE_DEBUG BIT(10) 258 #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) 259 #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) 260 #define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) 261 #define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) 262 #define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON) 263 264 /* List of all resource groups */ 265 extern struct list_head rdt_all_groups; 266 267 extern int max_name_width; 268 269 /** 270 * struct rftype - describe each file in the resctrl file system 271 * @name: File name 272 * @mode: Access mode 273 * @kf_ops: File operations 274 * @flags: File specific RFTYPE_FLAGS_* flags 275 * @fflags: File specific RFTYPE_* flags 276 * @seq_show: Show content of the file 277 * @write: Write to the file 278 */ 279 struct rftype { 280 char *name; 281 umode_t mode; 282 const struct kernfs_ops *kf_ops; 283 unsigned long flags; 284 unsigned long fflags; 285 286 int (*seq_show)(struct kernfs_open_file *of, 287 struct seq_file *sf, void *v); 288 /* 289 * write() is the generic write callback which maps directly to 290 * kernfs write operation and overrides all other operations. 291 * Maximum write size is determined by ->max_write_len. 292 */ 293 ssize_t (*write)(struct kernfs_open_file *of, 294 char *buf, size_t nbytes, loff_t off); 295 }; 296 297 /** 298 * struct mbm_state - status for each MBM counter in each domain 299 * @prev_bw_bytes: Previous bytes value read for bandwidth calculation 300 * @prev_bw: The most recent bandwidth in MBps 301 */ 302 struct mbm_state { 303 u64 prev_bw_bytes; 304 u32 prev_bw; 305 }; 306 307 /** 308 * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s 309 * return value. 310 * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) 311 * @prev_msr: Value of IA32_QM_CTR last time it was read for the RMID used to 312 * find this struct. 313 */ 314 struct arch_mbm_state { 315 u64 chunks; 316 u64 prev_msr; 317 }; 318 319 /** 320 * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share 321 * a resource for a control function 322 * @d_resctrl: Properties exposed to the resctrl file system 323 * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) 324 * 325 * Members of this structure are accessed via helpers that provide abstraction. 326 */ 327 struct rdt_hw_ctrl_domain { 328 struct rdt_ctrl_domain d_resctrl; 329 u32 *ctrl_val; 330 }; 331 332 /** 333 * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share 334 * a resource for a monitor function 335 * @d_resctrl: Properties exposed to the resctrl file system 336 * @arch_mbm_total: arch private state for MBM total bandwidth 337 * @arch_mbm_local: arch private state for MBM local bandwidth 338 * 339 * Members of this structure are accessed via helpers that provide abstraction. 340 */ 341 struct rdt_hw_mon_domain { 342 struct rdt_mon_domain d_resctrl; 343 struct arch_mbm_state *arch_mbm_total; 344 struct arch_mbm_state *arch_mbm_local; 345 }; 346 347 static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) 348 { 349 return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl); 350 } 351 352 static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r) 353 { 354 return container_of(r, struct rdt_hw_mon_domain, d_resctrl); 355 } 356 357 /** 358 * struct msr_param - set a range of MSRs from a domain 359 * @res: The resource to use 360 * @dom: The domain to update 361 * @low: Beginning index from base MSR 362 * @high: End index 363 */ 364 struct msr_param { 365 struct rdt_resource *res; 366 struct rdt_ctrl_domain *dom; 367 u32 low; 368 u32 high; 369 }; 370 371 /** 372 * struct rdt_hw_resource - arch private attributes of a resctrl resource 373 * @r_resctrl: Attributes of the resource used directly by resctrl. 374 * @num_closid: Maximum number of closid this hardware can support, 375 * regardless of CDP. This is exposed via 376 * resctrl_arch_get_num_closid() to avoid confusion 377 * with struct resctrl_schema's property of the same name, 378 * which has been corrected for features like CDP. 379 * @msr_base: Base MSR address for CBMs 380 * @msr_update: Function pointer to update QOS MSRs 381 * @mon_scale: cqm counter * mon_scale = occupancy in bytes 382 * @mbm_width: Monitor width, to detect and correct for overflow. 383 * @cdp_enabled: CDP state of this resource 384 * 385 * Members of this structure are either private to the architecture 386 * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. 387 * msr_update and msr_base. 388 */ 389 struct rdt_hw_resource { 390 struct rdt_resource r_resctrl; 391 u32 num_closid; 392 unsigned int msr_base; 393 void (*msr_update)(struct msr_param *m); 394 unsigned int mon_scale; 395 unsigned int mbm_width; 396 bool cdp_enabled; 397 }; 398 399 static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) 400 { 401 return container_of(r, struct rdt_hw_resource, r_resctrl); 402 } 403 404 extern struct mutex rdtgroup_mutex; 405 406 static inline const char *rdt_kn_name(const struct kernfs_node *kn) 407 { 408 return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex)); 409 } 410 411 extern struct rdt_hw_resource rdt_resources_all[]; 412 extern struct rdtgroup rdtgroup_default; 413 extern struct dentry *debugfs_resctrl; 414 extern enum resctrl_event_id mba_mbps_default_event; 415 416 static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) 417 { 418 return rdt_resources_all[l].cdp_enabled; 419 } 420 421 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); 422 423 void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); 424 425 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ 426 union cpuid_0x10_1_eax { 427 struct { 428 unsigned int cbm_len:5; 429 } split; 430 unsigned int full; 431 }; 432 433 /* CPUID.(EAX=10H, ECX=ResID=3).EAX */ 434 union cpuid_0x10_3_eax { 435 struct { 436 unsigned int max_delay:12; 437 } split; 438 unsigned int full; 439 }; 440 441 /* CPUID.(EAX=10H, ECX=ResID).ECX */ 442 union cpuid_0x10_x_ecx { 443 struct { 444 unsigned int reserved:3; 445 unsigned int noncont:1; 446 } split; 447 unsigned int full; 448 }; 449 450 /* CPUID.(EAX=10H, ECX=ResID).EDX */ 451 union cpuid_0x10_x_edx { 452 struct { 453 unsigned int cos_max:16; 454 } split; 455 unsigned int full; 456 }; 457 458 void rdt_last_cmd_clear(void); 459 void rdt_last_cmd_puts(const char *s); 460 __printf(1, 2) 461 void rdt_last_cmd_printf(const char *fmt, ...); 462 463 void rdt_ctrl_update(void *arg); 464 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); 465 void rdtgroup_kn_unlock(struct kernfs_node *kn); 466 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); 467 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, 468 umode_t mask); 469 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, 470 char *buf, size_t nbytes, loff_t off); 471 int rdtgroup_schemata_show(struct kernfs_open_file *of, 472 struct seq_file *s, void *v); 473 ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, 474 char *buf, size_t nbytes, loff_t off); 475 int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of, 476 struct seq_file *s, void *v); 477 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, 478 unsigned long cbm, int closid, bool exclusive); 479 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d, 480 unsigned long cbm); 481 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); 482 int rdtgroup_tasks_assigned(struct rdtgroup *r); 483 int closids_supported(void); 484 void closid_free(int closid); 485 int alloc_rmid(u32 closid); 486 void free_rmid(u32 closid, u32 rmid); 487 int rdt_get_mon_l3_config(struct rdt_resource *r); 488 void resctrl_mon_resource_exit(void); 489 bool __init rdt_cpu_has(int flag); 490 void mon_event_count(void *info); 491 int rdtgroup_mondata_show(struct seq_file *m, void *arg); 492 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, 493 struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, 494 cpumask_t *cpumask, int evtid, int first); 495 int __init resctrl_mon_resource_init(void); 496 void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, 497 unsigned long delay_ms, 498 int exclude_cpu); 499 void mbm_handle_overflow(struct work_struct *work); 500 void __init intel_rdt_mbm_apply_quirk(void); 501 bool is_mba_sc(struct rdt_resource *r); 502 void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, 503 int exclude_cpu); 504 void cqm_handle_limbo(struct work_struct *work); 505 bool has_busy_rmid(struct rdt_mon_domain *d); 506 void __check_limbo(struct rdt_mon_domain *d, bool force_free); 507 void rdt_domain_reconfigure_cdp(struct rdt_resource *r); 508 void resctrl_file_fflags_init(const char *config, unsigned long fflags); 509 void rdt_staged_configs_clear(void); 510 bool closid_allocated(unsigned int closid); 511 int resctrl_find_cleanest_closid(void); 512 513 #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK 514 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); 515 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); 516 bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); 517 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); 518 int rdt_pseudo_lock_init(void); 519 void rdt_pseudo_lock_release(void); 520 int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); 521 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); 522 #else 523 static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) 524 { 525 return -EOPNOTSUPP; 526 } 527 528 static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) 529 { 530 return -EOPNOTSUPP; 531 } 532 533 static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) 534 { 535 return false; 536 } 537 538 static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) 539 { 540 return false; 541 } 542 543 static inline int rdt_pseudo_lock_init(void) { return 0; } 544 static inline void rdt_pseudo_lock_release(void) { } 545 static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) 546 { 547 return -EOPNOTSUPP; 548 } 549 550 static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } 551 #endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ 552 553 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ 554