1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Housekeeping management. Manage the targets for routine code that can run on 4 * any CPU: unbound workqueues, timers, kthreads and any offloadable work. 5 * 6 * Copyright (C) 2017 Red Hat, Inc., Frederic Weisbecker 7 * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker 8 * 9 */ 10 #include <linux/sched/isolation.h> 11 #include <linux/pci.h> 12 #include "sched.h" 13 14 enum hk_flags { 15 HK_FLAG_DOMAIN_BOOT = BIT(HK_TYPE_DOMAIN_BOOT), 16 HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN), 17 HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ), 18 HK_FLAG_KERNEL_NOISE = BIT(HK_TYPE_KERNEL_NOISE), 19 }; 20 21 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden); 22 EXPORT_SYMBOL_GPL(housekeeping_overridden); 23 24 struct housekeeping { 25 struct cpumask __rcu *cpumasks[HK_TYPE_MAX]; 26 unsigned long flags; 27 }; 28 29 static struct housekeeping housekeeping; 30 31 bool housekeeping_enabled(enum hk_type type) 32 { 33 return !!(READ_ONCE(housekeeping.flags) & BIT(type)); 34 } 35 EXPORT_SYMBOL_GPL(housekeeping_enabled); 36 37 static bool housekeeping_dereference_check(enum hk_type type) 38 { 39 if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) { 40 /* Cpuset isn't even writable yet? */ 41 if (system_state <= SYSTEM_SCHEDULING) 42 return true; 43 44 /* CPU hotplug write locked, so cpuset partition can't be overwritten */ 45 if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held()) 46 return true; 47 48 /* Cpuset lock held, partitions not writable */ 49 if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held()) 50 return true; 51 52 return false; 53 } 54 55 return true; 56 } 57 58 static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type) 59 { 60 return rcu_dereference_all_check(housekeeping.cpumasks[type], 61 housekeeping_dereference_check(type)); 62 } 63 64 const struct cpumask *housekeeping_cpumask(enum hk_type type) 65 { 66 const struct cpumask *mask = NULL; 67 68 if (static_branch_unlikely(&housekeeping_overridden)) { 69 if (READ_ONCE(housekeeping.flags) & BIT(type)) 70 mask = housekeeping_cpumask_dereference(type); 71 } 72 if (!mask) 73 mask = cpu_possible_mask; 74 return mask; 75 } 76 EXPORT_SYMBOL_GPL(housekeeping_cpumask); 77 78 int housekeeping_any_cpu(enum hk_type type) 79 { 80 int cpu; 81 82 if (static_branch_unlikely(&housekeeping_overridden)) { 83 if (housekeeping.flags & BIT(type)) { 84 cpu = sched_numa_find_closest(housekeeping_cpumask(type), smp_processor_id()); 85 if (cpu < nr_cpu_ids) 86 return cpu; 87 88 cpu = cpumask_any_and_distribute(housekeeping_cpumask(type), cpu_online_mask); 89 if (likely(cpu < nr_cpu_ids)) 90 return cpu; 91 /* 92 * Unless we have another problem this can only happen 93 * at boot time before start_secondary() brings the 1st 94 * housekeeping CPU up. 95 */ 96 WARN_ON_ONCE(system_state == SYSTEM_RUNNING || 97 type != HK_TYPE_TIMER); 98 } 99 } 100 return smp_processor_id(); 101 } 102 EXPORT_SYMBOL_GPL(housekeeping_any_cpu); 103 104 void housekeeping_affine(struct task_struct *t, enum hk_type type) 105 { 106 if (static_branch_unlikely(&housekeeping_overridden)) 107 if (housekeeping.flags & BIT(type)) 108 set_cpus_allowed_ptr(t, housekeeping_cpumask(type)); 109 } 110 EXPORT_SYMBOL_GPL(housekeeping_affine); 111 112 bool housekeeping_test_cpu(int cpu, enum hk_type type) 113 { 114 if (static_branch_unlikely(&housekeeping_overridden) && 115 READ_ONCE(housekeeping.flags) & BIT(type)) 116 return cpumask_test_cpu(cpu, housekeeping_cpumask(type)); 117 return true; 118 } 119 EXPORT_SYMBOL_GPL(housekeeping_test_cpu); 120 121 int housekeeping_update(struct cpumask *isol_mask) 122 { 123 struct cpumask *trial, *old = NULL; 124 int err; 125 126 lockdep_assert_cpus_held(); 127 128 trial = kmalloc(cpumask_size(), GFP_KERNEL); 129 if (!trial) 130 return -ENOMEM; 131 132 cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask); 133 if (!cpumask_intersects(trial, cpu_online_mask)) { 134 kfree(trial); 135 return -EINVAL; 136 } 137 138 if (!housekeeping.flags) 139 static_branch_enable_cpuslocked(&housekeeping_overridden); 140 141 if (housekeeping.flags & HK_FLAG_DOMAIN) 142 old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN); 143 else 144 WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN); 145 rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial); 146 147 synchronize_rcu(); 148 149 pci_probe_flush_workqueue(); 150 mem_cgroup_flush_workqueue(); 151 vmstat_flush_workqueue(); 152 153 err = workqueue_unbound_housekeeping_update(housekeeping_cpumask(HK_TYPE_DOMAIN)); 154 WARN_ON_ONCE(err < 0); 155 156 err = tmigr_isolated_exclude_cpumask(isol_mask); 157 WARN_ON_ONCE(err < 0); 158 159 err = kthreads_update_housekeeping(); 160 WARN_ON_ONCE(err < 0); 161 162 kfree(old); 163 164 return 0; 165 } 166 167 void __init housekeeping_init(void) 168 { 169 enum hk_type type; 170 171 if (!housekeeping.flags) 172 return; 173 174 static_branch_enable(&housekeeping_overridden); 175 176 if (housekeeping.flags & HK_FLAG_KERNEL_NOISE) 177 sched_tick_offload_init(); 178 /* 179 * Realloc with a proper allocator so that any cpumask update 180 * can indifferently free the old version with kfree(). 181 */ 182 for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) { 183 struct cpumask *omask, *nmask = kmalloc(cpumask_size(), GFP_KERNEL); 184 185 if (WARN_ON_ONCE(!nmask)) 186 return; 187 188 omask = rcu_dereference(housekeeping.cpumasks[type]); 189 190 /* We need at least one CPU to handle housekeeping work */ 191 WARN_ON_ONCE(cpumask_empty(omask)); 192 cpumask_copy(nmask, omask); 193 RCU_INIT_POINTER(housekeeping.cpumasks[type], nmask); 194 memblock_free(omask, cpumask_size()); 195 } 196 } 197 198 static void __init housekeeping_setup_type(enum hk_type type, 199 cpumask_var_t housekeeping_staging) 200 { 201 struct cpumask *mask = memblock_alloc_or_panic(cpumask_size(), SMP_CACHE_BYTES); 202 203 cpumask_copy(mask, housekeeping_staging); 204 RCU_INIT_POINTER(housekeeping.cpumasks[type], mask); 205 } 206 207 static int __init housekeeping_setup(char *str, unsigned long flags) 208 { 209 cpumask_var_t non_housekeeping_mask, housekeeping_staging; 210 unsigned int first_cpu; 211 int err = 0; 212 213 if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) { 214 if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) { 215 pr_warn("Housekeeping: nohz unsupported." 216 " Build with CONFIG_NO_HZ_FULL\n"); 217 return 0; 218 } 219 } 220 221 alloc_bootmem_cpumask_var(&non_housekeeping_mask); 222 if (cpulist_parse(str, non_housekeeping_mask) < 0) { 223 pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); 224 goto free_non_housekeeping_mask; 225 } 226 227 alloc_bootmem_cpumask_var(&housekeeping_staging); 228 cpumask_andnot(housekeeping_staging, 229 cpu_possible_mask, non_housekeeping_mask); 230 231 first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); 232 if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { 233 __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); 234 __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); 235 if (!housekeeping.flags) { 236 pr_warn("Housekeeping: must include one present CPU, " 237 "using boot CPU:%d\n", smp_processor_id()); 238 } 239 } 240 241 if (cpumask_empty(non_housekeeping_mask)) 242 goto free_housekeeping_staging; 243 244 if (!housekeeping.flags) { 245 /* First setup call ("nohz_full=" or "isolcpus=") */ 246 enum hk_type type; 247 248 for_each_set_bit(type, &flags, HK_TYPE_MAX) 249 housekeeping_setup_type(type, housekeeping_staging); 250 } else { 251 /* Second setup call ("nohz_full=" after "isolcpus=" or the reverse) */ 252 enum hk_type type; 253 unsigned long iter_flags = flags & housekeeping.flags; 254 255 for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) { 256 if (!cpumask_equal(housekeeping_staging, 257 housekeeping_cpumask(type))) { 258 pr_warn("Housekeeping: nohz_full= must match isolcpus=\n"); 259 goto free_housekeeping_staging; 260 } 261 } 262 263 /* 264 * Check the combination of nohz_full and isolcpus=domain, 265 * necessary to avoid problems with the timer migration 266 * hierarchy. managed_irq is ignored by this check since it 267 * isn't considered in the timer migration logic. 268 */ 269 iter_flags = housekeeping.flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN); 270 type = find_first_bit(&iter_flags, HK_TYPE_MAX); 271 /* 272 * Pass the check if none of these flags were previously set or 273 * are not in the current selection. 274 */ 275 iter_flags = flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN); 276 first_cpu = (type == HK_TYPE_MAX || !iter_flags) ? 0 : 277 cpumask_first_and_and(cpu_present_mask, 278 housekeeping_staging, housekeeping_cpumask(type)); 279 if (first_cpu >= min(nr_cpu_ids, setup_max_cpus)) { 280 pr_warn("Housekeeping: must include one present CPU " 281 "neither in nohz_full= nor in isolcpus=domain, " 282 "ignoring setting %s\n", str); 283 goto free_housekeeping_staging; 284 } 285 286 iter_flags = flags & ~housekeeping.flags; 287 288 for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) 289 housekeeping_setup_type(type, housekeeping_staging); 290 } 291 292 if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) 293 tick_nohz_full_setup(non_housekeeping_mask); 294 295 housekeeping.flags |= flags; 296 err = 1; 297 298 free_housekeeping_staging: 299 free_bootmem_cpumask_var(housekeeping_staging); 300 free_non_housekeeping_mask: 301 free_bootmem_cpumask_var(non_housekeeping_mask); 302 303 return err; 304 } 305 306 static int __init housekeeping_nohz_full_setup(char *str) 307 { 308 unsigned long flags; 309 310 flags = HK_FLAG_KERNEL_NOISE; 311 312 return housekeeping_setup(str, flags); 313 } 314 __setup("nohz_full=", housekeeping_nohz_full_setup); 315 316 static int __init housekeeping_isolcpus_setup(char *str) 317 { 318 unsigned long flags = 0; 319 bool illegal = false; 320 char *par; 321 int len; 322 323 while (isalpha(*str)) { 324 /* 325 * isolcpus=nohz is equivalent to nohz_full. 326 */ 327 if (!strncmp(str, "nohz,", 5)) { 328 str += 5; 329 flags |= HK_FLAG_KERNEL_NOISE; 330 continue; 331 } 332 333 if (!strncmp(str, "domain,", 7)) { 334 str += 7; 335 flags |= HK_FLAG_DOMAIN | HK_FLAG_DOMAIN_BOOT; 336 continue; 337 } 338 339 if (!strncmp(str, "managed_irq,", 12)) { 340 str += 12; 341 flags |= HK_FLAG_MANAGED_IRQ; 342 continue; 343 } 344 345 /* 346 * Skip unknown sub-parameter and validate that it is not 347 * containing an invalid character. 348 */ 349 for (par = str, len = 0; *str && *str != ','; str++, len++) { 350 if (!isalpha(*str) && *str != '_') 351 illegal = true; 352 } 353 354 if (illegal) { 355 pr_warn("isolcpus: Invalid flag %.*s\n", len, par); 356 return 0; 357 } 358 359 pr_info("isolcpus: Skipped unknown flag %.*s\n", len, par); 360 str++; 361 } 362 363 /* Default behaviour for isolcpus without flags */ 364 if (!flags) 365 flags |= HK_FLAG_DOMAIN | HK_FLAG_DOMAIN_BOOT; 366 367 return housekeeping_setup(str, flags); 368 } 369 __setup("isolcpus=", housekeeping_isolcpus_setup); 370