1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Housekeeping management. Manage the targets for routine code that can run on 4 * any CPU: unbound workqueues, timers, kthreads and any offloadable work. 5 * 6 * Copyright (C) 2017 Red Hat, Inc., Frederic Weisbecker 7 * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker 8 * 9 */ 10 #include <linux/sched/isolation.h> 11 #include <linux/pci.h> 12 #include "sched.h" 13 14 enum hk_flags { 15 HK_FLAG_DOMAIN_BOOT = BIT(HK_TYPE_DOMAIN_BOOT), 16 HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN), 17 HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ), 18 HK_FLAG_KERNEL_NOISE = BIT(HK_TYPE_KERNEL_NOISE), 19 }; 20 21 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden); 22 EXPORT_SYMBOL_GPL(housekeeping_overridden); 23 24 struct housekeeping { 25 struct cpumask __rcu *cpumasks[HK_TYPE_MAX]; 26 unsigned long flags; 27 }; 28 29 static struct housekeeping housekeeping; 30 31 bool housekeeping_enabled(enum hk_type type) 32 { 33 return !!(READ_ONCE(housekeeping.flags) & BIT(type)); 34 } 35 EXPORT_SYMBOL_GPL(housekeeping_enabled); 36 37 static bool housekeeping_dereference_check(enum hk_type type) 38 { 39 if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) { 40 /* Cpuset isn't even writable yet? */ 41 if (system_state <= SYSTEM_SCHEDULING) 42 return true; 43 44 /* CPU hotplug write locked, so cpuset partition can't be overwritten */ 45 if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held()) 46 return true; 47 48 /* Cpuset lock held, partitions not writable */ 49 if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held()) 50 return true; 51 52 return false; 53 } 54 55 return true; 56 } 57 58 static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type) 59 { 60 return rcu_dereference_all_check(housekeeping.cpumasks[type], 61 housekeeping_dereference_check(type)); 62 } 63 64 const struct cpumask *housekeeping_cpumask(enum hk_type type) 65 { 66 const struct cpumask *mask = NULL; 67 68 if (static_branch_unlikely(&housekeeping_overridden)) { 69 if (READ_ONCE(housekeeping.flags) & BIT(type)) 70 mask = housekeeping_cpumask_dereference(type); 71 } 72 if (!mask) 73 mask = cpu_possible_mask; 74 return mask; 75 } 76 EXPORT_SYMBOL_GPL(housekeeping_cpumask); 77 78 int housekeeping_any_cpu(enum hk_type type) 79 { 80 int cpu; 81 82 if (static_branch_unlikely(&housekeeping_overridden)) { 83 if (housekeeping.flags & BIT(type)) { 84 cpu = sched_numa_find_closest(housekeeping_cpumask(type), smp_processor_id()); 85 if (cpu < nr_cpu_ids) 86 return cpu; 87 88 cpu = cpumask_any_and_distribute(housekeeping_cpumask(type), cpu_online_mask); 89 if (likely(cpu < nr_cpu_ids)) 90 return cpu; 91 /* 92 * Unless we have another problem this can only happen 93 * at boot time before start_secondary() brings the 1st 94 * housekeeping CPU up. 95 */ 96 WARN_ON_ONCE(system_state == SYSTEM_RUNNING || 97 type != HK_TYPE_TIMER); 98 } 99 } 100 return smp_processor_id(); 101 } 102 EXPORT_SYMBOL_GPL(housekeeping_any_cpu); 103 104 void housekeeping_affine(struct task_struct *t, enum hk_type type) 105 { 106 if (static_branch_unlikely(&housekeeping_overridden)) 107 if (housekeeping.flags & BIT(type)) 108 set_cpus_allowed_ptr(t, housekeeping_cpumask(type)); 109 } 110 EXPORT_SYMBOL_GPL(housekeeping_affine); 111 112 bool housekeeping_test_cpu(int cpu, enum hk_type type) 113 { 114 if (static_branch_unlikely(&housekeeping_overridden) && 115 READ_ONCE(housekeeping.flags) & BIT(type)) 116 return cpumask_test_cpu(cpu, housekeeping_cpumask(type)); 117 return true; 118 } 119 EXPORT_SYMBOL_GPL(housekeeping_test_cpu); 120 121 int housekeeping_update(struct cpumask *isol_mask) 122 { 123 struct cpumask *trial, *old = NULL; 124 int err; 125 126 trial = kmalloc(cpumask_size(), GFP_KERNEL); 127 if (!trial) 128 return -ENOMEM; 129 130 cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask); 131 if (!cpumask_intersects(trial, cpu_online_mask)) { 132 kfree(trial); 133 return -EINVAL; 134 } 135 136 if (!housekeeping.flags) 137 static_branch_enable(&housekeeping_overridden); 138 139 if (housekeeping.flags & HK_FLAG_DOMAIN) 140 old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN); 141 else 142 WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN); 143 rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial); 144 145 synchronize_rcu(); 146 147 pci_probe_flush_workqueue(); 148 mem_cgroup_flush_workqueue(); 149 vmstat_flush_workqueue(); 150 151 err = workqueue_unbound_housekeeping_update(housekeeping_cpumask(HK_TYPE_DOMAIN)); 152 WARN_ON_ONCE(err < 0); 153 154 err = tmigr_isolated_exclude_cpumask(isol_mask); 155 WARN_ON_ONCE(err < 0); 156 157 err = kthreads_update_housekeeping(); 158 WARN_ON_ONCE(err < 0); 159 160 kfree(old); 161 162 return 0; 163 } 164 165 void __init housekeeping_init(void) 166 { 167 enum hk_type type; 168 169 if (!housekeeping.flags) 170 return; 171 172 static_branch_enable(&housekeeping_overridden); 173 174 if (housekeeping.flags & HK_FLAG_KERNEL_NOISE) 175 sched_tick_offload_init(); 176 /* 177 * Realloc with a proper allocator so that any cpumask update 178 * can indifferently free the old version with kfree(). 179 */ 180 for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) { 181 struct cpumask *omask, *nmask = kmalloc(cpumask_size(), GFP_KERNEL); 182 183 if (WARN_ON_ONCE(!nmask)) 184 return; 185 186 omask = rcu_dereference(housekeeping.cpumasks[type]); 187 188 /* We need at least one CPU to handle housekeeping work */ 189 WARN_ON_ONCE(cpumask_empty(omask)); 190 cpumask_copy(nmask, omask); 191 RCU_INIT_POINTER(housekeeping.cpumasks[type], nmask); 192 memblock_free(omask, cpumask_size()); 193 } 194 } 195 196 static void __init housekeeping_setup_type(enum hk_type type, 197 cpumask_var_t housekeeping_staging) 198 { 199 struct cpumask *mask = memblock_alloc_or_panic(cpumask_size(), SMP_CACHE_BYTES); 200 201 cpumask_copy(mask, housekeeping_staging); 202 RCU_INIT_POINTER(housekeeping.cpumasks[type], mask); 203 } 204 205 static int __init housekeeping_setup(char *str, unsigned long flags) 206 { 207 cpumask_var_t non_housekeeping_mask, housekeeping_staging; 208 unsigned int first_cpu; 209 int err = 0; 210 211 if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) { 212 if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) { 213 pr_warn("Housekeeping: nohz unsupported." 214 " Build with CONFIG_NO_HZ_FULL\n"); 215 return 0; 216 } 217 } 218 219 alloc_bootmem_cpumask_var(&non_housekeeping_mask); 220 if (cpulist_parse(str, non_housekeeping_mask) < 0) { 221 pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); 222 goto free_non_housekeeping_mask; 223 } 224 225 alloc_bootmem_cpumask_var(&housekeeping_staging); 226 cpumask_andnot(housekeeping_staging, 227 cpu_possible_mask, non_housekeeping_mask); 228 229 first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); 230 if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { 231 __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); 232 __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); 233 if (!housekeeping.flags) { 234 pr_warn("Housekeeping: must include one present CPU, " 235 "using boot CPU:%d\n", smp_processor_id()); 236 } 237 } 238 239 if (cpumask_empty(non_housekeeping_mask)) 240 goto free_housekeeping_staging; 241 242 if (!housekeeping.flags) { 243 /* First setup call ("nohz_full=" or "isolcpus=") */ 244 enum hk_type type; 245 246 for_each_set_bit(type, &flags, HK_TYPE_MAX) 247 housekeeping_setup_type(type, housekeeping_staging); 248 } else { 249 /* Second setup call ("nohz_full=" after "isolcpus=" or the reverse) */ 250 enum hk_type type; 251 unsigned long iter_flags = flags & housekeeping.flags; 252 253 for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) { 254 if (!cpumask_equal(housekeeping_staging, 255 housekeeping_cpumask(type))) { 256 pr_warn("Housekeeping: nohz_full= must match isolcpus=\n"); 257 goto free_housekeeping_staging; 258 } 259 } 260 261 /* 262 * Check the combination of nohz_full and isolcpus=domain, 263 * necessary to avoid problems with the timer migration 264 * hierarchy. managed_irq is ignored by this check since it 265 * isn't considered in the timer migration logic. 266 */ 267 iter_flags = housekeeping.flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN); 268 type = find_first_bit(&iter_flags, HK_TYPE_MAX); 269 /* 270 * Pass the check if none of these flags were previously set or 271 * are not in the current selection. 272 */ 273 iter_flags = flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN); 274 first_cpu = (type == HK_TYPE_MAX || !iter_flags) ? 0 : 275 cpumask_first_and_and(cpu_present_mask, 276 housekeeping_staging, housekeeping_cpumask(type)); 277 if (first_cpu >= min(nr_cpu_ids, setup_max_cpus)) { 278 pr_warn("Housekeeping: must include one present CPU " 279 "neither in nohz_full= nor in isolcpus=domain, " 280 "ignoring setting %s\n", str); 281 goto free_housekeeping_staging; 282 } 283 284 iter_flags = flags & ~housekeeping.flags; 285 286 for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) 287 housekeeping_setup_type(type, housekeeping_staging); 288 } 289 290 if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) 291 tick_nohz_full_setup(non_housekeeping_mask); 292 293 housekeeping.flags |= flags; 294 err = 1; 295 296 free_housekeeping_staging: 297 free_bootmem_cpumask_var(housekeeping_staging); 298 free_non_housekeeping_mask: 299 free_bootmem_cpumask_var(non_housekeeping_mask); 300 301 return err; 302 } 303 304 static int __init housekeeping_nohz_full_setup(char *str) 305 { 306 unsigned long flags; 307 308 flags = HK_FLAG_KERNEL_NOISE; 309 310 return housekeeping_setup(str, flags); 311 } 312 __setup("nohz_full=", housekeeping_nohz_full_setup); 313 314 static int __init housekeeping_isolcpus_setup(char *str) 315 { 316 unsigned long flags = 0; 317 bool illegal = false; 318 char *par; 319 int len; 320 321 while (isalpha(*str)) { 322 /* 323 * isolcpus=nohz is equivalent to nohz_full. 324 */ 325 if (!strncmp(str, "nohz,", 5)) { 326 str += 5; 327 flags |= HK_FLAG_KERNEL_NOISE; 328 continue; 329 } 330 331 if (!strncmp(str, "domain,", 7)) { 332 str += 7; 333 flags |= HK_FLAG_DOMAIN | HK_FLAG_DOMAIN_BOOT; 334 continue; 335 } 336 337 if (!strncmp(str, "managed_irq,", 12)) { 338 str += 12; 339 flags |= HK_FLAG_MANAGED_IRQ; 340 continue; 341 } 342 343 /* 344 * Skip unknown sub-parameter and validate that it is not 345 * containing an invalid character. 346 */ 347 for (par = str, len = 0; *str && *str != ','; str++, len++) { 348 if (!isalpha(*str) && *str != '_') 349 illegal = true; 350 } 351 352 if (illegal) { 353 pr_warn("isolcpus: Invalid flag %.*s\n", len, par); 354 return 0; 355 } 356 357 pr_info("isolcpus: Skipped unknown flag %.*s\n", len, par); 358 str++; 359 } 360 361 /* Default behaviour for isolcpus without flags */ 362 if (!flags) 363 flags |= HK_FLAG_DOMAIN | HK_FLAG_DOMAIN_BOOT; 364 365 return housekeeping_setup(str, flags); 366 } 367 __setup("isolcpus=", housekeeping_isolcpus_setup); 368