topology.c - OpenGrok cross reference for /linux/kernel/sched/topology.c

Lines Matching +full:child +full:- +full:node
1 // SPDX-License-Identifier: GPL-2.0
46 	struct sched_group *group = sd->groups;  in sched_domain_debug_one()
47 	unsigned long flags = sd->flags;  in sched_domain_debug_one()
52 	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);  in sched_domain_debug_one()
54 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);  in sched_domain_debug_one()
57 		printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);  in sched_domain_debug_one()
60 		printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);  in sched_domain_debug_one()
67 		if ((meta_flags & SDF_SHARED_CHILD) && sd->child &&  in sched_domain_debug_one()
68 		    !(sd->child->flags & flag))  in sched_domain_debug_one()
69 			printk(KERN_ERR "ERROR: flag %s set here but not in child\n",  in sched_domain_debug_one()
72 		if ((meta_flags & SDF_SHARED_PARENT) && sd->parent &&  in sched_domain_debug_one()
73 		    !(sd->parent->flags & flag))  in sched_domain_debug_one()
92 		if (!(sd->flags & SD_NUMA) &&  in sched_domain_debug_one()
102 				group->sgc->id,  in sched_domain_debug_one()
105 		if ((sd->flags & SD_NUMA) &&  in sched_domain_debug_one()
111 		if (group->sgc->capacity != SCHED_CAPACITY_SCALE)  in sched_domain_debug_one()
112 			printk(KERN_CONT " cap=%lu", group->sgc->capacity);  in sched_domain_debug_one()
114 		if (group == sd->groups && sd->child &&  in sched_domain_debug_one()
115 		    !cpumask_equal(sched_domain_span(sd->child),  in sched_domain_debug_one()
117 			printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");  in sched_domain_debug_one()
122 		group = group->next;  in sched_domain_debug_one()
124 		if (group != sd->groups)  in sched_domain_debug_one()
127 	} while (group != sd->groups);  in sched_domain_debug_one()
131 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");  in sched_domain_debug_one()
133 	if (sd->parent &&  in sched_domain_debug_one()
134 	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))  in sched_domain_debug_one()
135 		printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");  in sched_domain_debug_one()
147 		printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);  in sched_domain_debug()
151 	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);  in sched_domain_debug()
157 		sd = sd->parent;  in sched_domain_debug()
176 	if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) &&  in sd_degenerate()
177 	    (sd->groups != sd->groups->next))  in sd_degenerate()
181 	if (sd->flags & (SD_WAKE_AFFINE))  in sd_degenerate()
190 	unsigned long cflags = sd->flags, pflags = parent->flags;  in sd_parent_degenerate()
199 	if (parent->groups == parent->groups->next)  in sd_parent_degenerate()
245 			pr_info("rd %*pbl: Checking EAS: frequency-invariant load tracking not yet supported",  in sched_is_eas_possible()
278 		return -EPERM;  in sched_energy_aware_handler()
282 			return -EOPNOTSUPP;  in sched_energy_aware_handler()
325 		tmp = pd->next;  in free_pd()
336 		pd = pd->next;  in find_pd()
356 	pd->em_pd = obj;  in pd_init()
373 				em_pd_nr_perf_states(pd->em_pd));  in perf_domain_debug()
374 		pd = pd->next;  in perf_domain_debug()
414 	struct root_domain *rd = cpu_rq(cpu)->rd;  in build_perf_domains()
431 		tmp->next = pd;  in build_perf_domains()
438 	tmp = rd->pd;  in build_perf_domains()
439 	rcu_assign_pointer(rd->pd, pd);  in build_perf_domains()
441 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);  in build_perf_domains()
447 	tmp = rd->pd;  in build_perf_domains()
448 	rcu_assign_pointer(rd->pd, NULL);  in build_perf_domains()
450 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);  in build_perf_domains()
462 	cpupri_cleanup(&rd->cpupri);  in free_rootdomain()
463 	cpudl_cleanup(&rd->cpudl);  in free_rootdomain()
464 	free_cpumask_var(rd->dlo_mask);  in free_rootdomain()
465 	free_cpumask_var(rd->rto_mask);  in free_rootdomain()
466 	free_cpumask_var(rd->online);  in free_rootdomain()
467 	free_cpumask_var(rd->span);  in free_rootdomain()
468 	free_pd(rd->pd);  in free_rootdomain()
479 	if (rq->rd) {  in rq_attach_root()
480 		old_rd = rq->rd;  in rq_attach_root()
482 		if (cpumask_test_cpu(rq->cpu, old_rd->online))  in rq_attach_root()
485 		cpumask_clear_cpu(rq->cpu, old_rd->span);  in rq_attach_root()
492 		if (!atomic_dec_and_test(&old_rd->refcount))  in rq_attach_root()
496 	atomic_inc(&rd->refcount);  in rq_attach_root()
497 	rq->rd = rd;  in rq_attach_root()
499 	cpumask_set_cpu(rq->cpu, rd->span);  in rq_attach_root()
500 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))  in rq_attach_root()
508 	if (rq->fair_server.dl_server)  in rq_attach_root()
509 		__dl_server_attach_root(&rq->fair_server, rq);  in rq_attach_root()
514 		call_rcu(&old_rd->rcu, free_rootdomain);  in rq_attach_root()
519 	atomic_inc(&rd->refcount);  in sched_get_rd()
524 	if (!atomic_dec_and_test(&rd->refcount))  in sched_put_rd()
527 	call_rcu(&rd->rcu, free_rootdomain);  in sched_put_rd()
532 	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))  in init_rootdomain()
534 	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))  in init_rootdomain()
536 	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))  in init_rootdomain()
538 	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))  in init_rootdomain()
542 	rd->rto_cpu = -1;  in init_rootdomain()
543 	raw_spin_lock_init(&rd->rto_lock);  in init_rootdomain()
544 	rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);  in init_rootdomain()
547 	rd->visit_cookie = 0;  in init_rootdomain()
548 	init_dl_bw(&rd->dl_bw);  in init_rootdomain()
549 	if (cpudl_init(&rd->cpudl) != 0)  in init_rootdomain()
552 	if (cpupri_init(&rd->cpupri) != 0)  in init_rootdomain()
557 	cpudl_cleanup(&rd->cpudl);  in init_rootdomain()
559 	free_cpumask_var(rd->rto_mask);  in init_rootdomain()
561 	free_cpumask_var(rd->dlo_mask);  in init_rootdomain()
563 	free_cpumask_var(rd->online);  in init_rootdomain()
565 	free_cpumask_var(rd->span);  in init_rootdomain()
567 	return -ENOMEM;  in init_rootdomain()
571  * By default the system creates a single root-domain with all CPUs as
608 		tmp = sg->next;  in free_sched_groups()
610 		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))  in free_sched_groups()
611 			kfree(sg->sgc);  in free_sched_groups()
613 		if (atomic_dec_and_test(&sg->ref))  in free_sched_groups()
626 	free_sched_groups(sd->groups, 1);  in destroy_sched_domain()
628 	if (sd->shared && atomic_dec_and_test(&sd->shared->ref))  in destroy_sched_domain()
629 		kfree(sd->shared);  in destroy_sched_domain()
638 		struct sched_domain *parent = sd->parent;  in destroy_sched_domains_rcu()
647 		call_rcu(&sd->rcu, destroy_sched_domains_rcu);  in destroy_sched_domains()
682 		sds = sd->shared;  in update_top_cache_domain()
697 	 * but equals to LLC id on non-Cluster machines.  in update_top_cache_domain()
723 		struct sched_domain *parent = tmp->parent;  in cpu_attach_domain()
728 			tmp->parent = parent->parent;  in cpu_attach_domain()
730 			if (parent->parent) {  in cpu_attach_domain()
731 				parent->parent->child = tmp;  in cpu_attach_domain()
732 				parent->parent->groups->flags = tmp->flags;  in cpu_attach_domain()
740 			if (parent->flags & SD_PREFER_SIBLING)  in cpu_attach_domain()
741 				tmp->flags |= SD_PREFER_SIBLING;  in cpu_attach_domain()
744 			tmp = tmp->parent;  in cpu_attach_domain()
749 		sd = sd->parent;  in cpu_attach_domain()
752 			struct sched_group *sg = sd->groups;  in cpu_attach_domain()
755 			 * sched groups hold the flags of the child sched  in cpu_attach_domain()
757 			 * the child is being destroyed.  in cpu_attach_domain()
760 				sg->flags = 0;  in cpu_attach_domain()
761 			} while (sg != sd->groups);  in cpu_attach_domain()
763 			sd->child = NULL;  in cpu_attach_domain()
770 	tmp = rq->sd;  in cpu_attach_domain()
771 	rcu_assign_pointer(rq->sd, sd);  in cpu_attach_domain()
808  * Given a node-distance table, for example:
810  *   node   0   1   2   3
816  * which represents a 4 node ring topology like:
818  *   0 ----- 1
822  *   3 ----- 2
830  * NUMA-2	0-3		0-3		0-3		0-3
831  *  groups:	{0-1,3},{1-3}	{0-2},{0,2-3}	{1-3},{0-1,3}	{0,2-3},{0-2}
833  * NUMA-1	0-1,3		0-2		1-3		0,2-3
836  * NUMA-0	0		1		2		3
840  * When we iterate a domain in child domain chunks some nodes can be
841  * represented multiple times -- hence the "overlap" naming for this part of
845  * domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
849  *  - the first group of each domain is its child domain; this
850  *    gets us the first 0-1,3
851  *  - the only uncovered node is 2, who's child domain is 1-3.
854  * more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
855  * groups include the CPUs of Node-0, while those CPUs would not in fact ever
856  * end up at those groups (they would end up in group: 0-1,3).
860  * (child) domain tree.
871  *   node   0   1   2   3
879  *   0 ----- 1
883  *   2 ----- 3
891  * NUMA-2	0-3						0-3
892  *  groups:	{0-2},{1-3}					{1-3},{0-2}
894  * NUMA-1	0-2		0-3		0-3		1-3
896  * NUMA-0	0		1		2		3
906  * isn't complete yet, however since each group represents a (child) domain we
914 	struct sd_data *sdd = sd->private;  in build_balance_mask()
921 		sibling = *per_cpu_ptr(sdd->sd, i);  in build_balance_mask()
928 		if (!sibling->child)  in build_balance_mask()
932 		if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))  in build_balance_mask()
943  * XXX: This creates per-node group entries; since the load-balancer will
944  * immediately access remote memory to construct this group's load-balance
945  * statistics having the groups node local is of dubious benefit.
960 	if (sd->child) {  in build_group_from_child_sched_domain()
961 		cpumask_copy(sg_span, sched_domain_span(sd->child));  in build_group_from_child_sched_domain()
962 		sg->flags = sd->child->flags;  in build_group_from_child_sched_domain()
967 	atomic_inc(&sg->ref);  in build_group_from_child_sched_domain()
975 	struct sd_data *sdd = sd->private;  in init_overlap_sched_group()
982 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);  in init_overlap_sched_group()
983 	if (atomic_inc_return(&sg->sgc->ref) == 1)  in init_overlap_sched_group()
989 	 * Initialize sgc->capacity such that even if we mess up the  in init_overlap_sched_group()
994 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);  in init_overlap_sched_group()
995 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;  in init_overlap_sched_group()
996 	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;  in init_overlap_sched_group()
1003 	 * The proper descendant would be the one whose child won't span out  in find_descended_sibling()
1006 	while (sibling->child &&  in find_descended_sibling()
1007 	       !cpumask_subset(sched_domain_span(sibling->child),  in find_descended_sibling()
1009 		sibling = sibling->child;  in find_descended_sibling()
1016 	while (sibling->child &&  in find_descended_sibling()
1017 	       cpumask_equal(sched_domain_span(sibling->child),  in find_descended_sibling()
1019 		sibling = sibling->child;  in find_descended_sibling()
1030 	struct sd_data *sdd = sd->private;  in build_overlap_sched_groups()
1042 		sibling = *per_cpu_ptr(sdd->sd, i);  in build_overlap_sched_groups()
1045 		 * Asymmetric node setups can result in situations where the  in build_overlap_sched_groups()
1058 		 * Usually we build sched_group by sibling's child sched_domain  in build_overlap_sched_groups()
1060 		 * to build sched_group by sibling's proper descendant's child  in build_overlap_sched_groups()
1061 		 * domain because sibling's child sched_domain will span out of  in build_overlap_sched_groups()
1066 		 *   node   0   1   2   3  in build_overlap_sched_groups()
1072 		 *   0 --- 1 --- 2 --- 3  in build_overlap_sched_groups()
1074 		 * NUMA-3       0-3             N/A             N/A             0-3  in build_overlap_sched_groups()
1075 		 *  groups:     {0-2},{1-3}                                     {1-3},{0-2}  in build_overlap_sched_groups()
1077 		 * NUMA-2       0-2             0-3             0-3             1-3  in build_overlap_sched_groups()
1078 		 *  groups:     {0-1},{1-3}     {0-2},{2-3}     {1-3},{0-1}     {2-3},{0-2}  in build_overlap_sched_groups()
1080 		 * NUMA-1       0-1             0-2             1-3             2-3  in build_overlap_sched_groups()
1083 		 * NUMA-0       0               1               2               3  in build_overlap_sched_groups()
1085 		 * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the  in build_overlap_sched_groups()
1088 		if (sibling->child &&  in build_overlap_sched_groups()
1089 		    !cpumask_subset(sched_domain_span(sibling->child), span))  in build_overlap_sched_groups()
1104 			last->next = sg;  in build_overlap_sched_groups()
1106 		last->next = first;  in build_overlap_sched_groups()
1108 	sd->groups = first;  in build_overlap_sched_groups()
1115 	return -ENOMEM;  in build_overlap_sched_groups()
1120  * Package topology (also see the load-balance blurb in fair.c)
1125  *  - Simultaneous multithreading (SMT)
1126  *  - Multi-Core Cache (MC)
1127  *  - Package (PKG)
1129  * Where the last one more or less denotes everything up to a NUMA node.
1133  *	sched_domain -> sched_group -> sched_group_capacity
1135  *          `-'             `-'
1137  * The sched_domains are per-CPU and have a two way link (parent & child) and
1153  *  - or -
1155  * PKG  0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
1156  * MC	0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
1157  * SMT  0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
1162  * topology levels, while sched_group moves you sideways through it, at child
1170  *  - The first is the balance_cpu (see should_we_balance() and the
1171  *    load-balance blurb in fair.c); for each group we only want 1 CPU to
1174  *  - The second is the sched_group_capacity; we want all identical groups
1187  * [*] in other words, the first group of each domain is its child domain.
1192 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);  in get_group()
1193 	struct sched_domain *child = sd->child;  in get_group()  local
1197 	if (child)  in get_group()
1198 		cpu = cpumask_first(sched_domain_span(child));  in get_group()
1200 	sg = *per_cpu_ptr(sdd->sg, cpu);  in get_group()
1201 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);  in get_group()
1204 	already_visited = atomic_inc_return(&sg->ref) > 1;  in get_group()
1206 	WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));  in get_group()
1212 	if (child) {  in get_group()
1213 		cpumask_copy(sched_group_span(sg), sched_domain_span(child));  in get_group()
1215 		sg->flags = child->flags;  in get_group()
1221 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));  in get_group()
1222 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;  in get_group()
1223 	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;  in get_group()
1230  * covered by the given span, will set each group's ->cpumask correctly,
1231  * and will initialize their ->sgc.
1239 	struct sd_data *sdd = sd->private;  in build_sched_groups()
1262 			last->next = sg;  in build_sched_groups()
1265 	last->next = first;  in build_sched_groups()
1266 	sd->groups = first;  in build_sched_groups()
1283 	struct sched_group *sg = sd->groups;  in init_sched_groups_capacity()
1289 		int cpu, cores = 0, max_cpu = -1;  in init_sched_groups_capacity()
1291 		sg->group_weight = cpumask_weight(sched_group_span(sg));  in init_sched_groups_capacity()
1300 		sg->cores = cores;  in init_sched_groups_capacity()
1302 		if (!(sd->flags & SD_ASYM_PACKING))  in init_sched_groups_capacity()
1311 		sg->asym_prefer_cpu = max_cpu;  in init_sched_groups_capacity()
1314 		sg = sg->next;  in init_sched_groups_capacity()
1315 	} while (sg != sd->groups);  in init_sched_groups_capacity()
1335 		if (!(sd->flags & SD_ASYM_PACKING))  in sched_update_asym_prefer_cpu()
1340 		 * node and will require updating "asym_prefer_cpu" on  in sched_update_asym_prefer_cpu()
1344 		 * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu"  in sched_update_asym_prefer_cpu()
1347 		WARN_ON_ONCE(sd->flags & SD_NUMA);  in sched_update_asym_prefer_cpu()
1349 		sg = sd->groups;  in sched_update_asym_prefer_cpu()
1350 		if (cpu != sg->asym_prefer_cpu) {  in sched_update_asym_prefer_cpu()
1357 			if (!sched_asym_prefer(cpu, sg->asym_prefer_cpu))  in sched_update_asym_prefer_cpu()
1360 			WRITE_ONCE(sg->asym_prefer_cpu, cpu);  in sched_update_asym_prefer_cpu()
1373 		WRITE_ONCE(sg->asym_prefer_cpu, asym_prefer_cpu);  in sched_update_asym_prefer_cpu()
1440 		if (capacity == entry->capacity)  in asym_cpu_capacity_update_data()
1442 		else if (!insert_entry && capacity > entry->capacity)  in asym_cpu_capacity_update_data()
1449 	entry->capacity = capacity;  in asym_cpu_capacity_update_data()
1453 		list_add_tail_rcu(&entry->link, &asym_cap_list);  in asym_cpu_capacity_update_data()
1455 		list_add_rcu(&entry->link, &insert_entry->link);  in asym_cpu_capacity_update_data()
1461  * Build-up/update list of CPUs grouped by their capacities
1478 			list_del_rcu(&entry->link);  in asym_cpu_capacity_scan()
1479 			call_rcu(&entry->rcu, free_asym_cap_entry);  in asym_cpu_capacity_scan()
1489 		list_del_rcu(&entry->link);  in asym_cpu_capacity_scan()
1490 		call_rcu(&entry->rcu, free_asym_cap_entry);  in asym_cpu_capacity_scan()
1496  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
1499 static int default_relax_domain_level = -1;
1516 	if (!attr || attr->relax_domain_level < 0) {  in set_domain_attribute()
1521 		request = attr->relax_domain_level;  in set_domain_attribute()
1523 	if (sd->level >= request) {  in set_domain_attribute()
1525 		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);  in set_domain_attribute()
1537 		if (!atomic_read(&d->rd->refcount))  in __free_domain_allocs()
1538 			free_rootdomain(&d->rd->rcu);  in __free_domain_allocs()
1541 		free_percpu(d->sd);  in __free_domain_allocs()
1558 	d->sd = alloc_percpu(struct sched_domain *);  in __visit_domain_allocation_hell()
1559 	if (!d->sd)  in __visit_domain_allocation_hell()
1561 	d->rd = alloc_rootdomain();  in __visit_domain_allocation_hell()
1562 	if (!d->rd)  in __visit_domain_allocation_hell()
1575 	struct sd_data *sdd = sd->private;  in claim_allocations()
1577 	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);  in claim_allocations()
1578 	*per_cpu_ptr(sdd->sd, cpu) = NULL;  in claim_allocations()
1580 	if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))  in claim_allocations()
1581 		*per_cpu_ptr(sdd->sds, cpu) = NULL;  in claim_allocations()
1583 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))  in claim_allocations()
1584 		*per_cpu_ptr(sdd->sg, cpu) = NULL;  in claim_allocations()
1586 	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))  in claim_allocations()
1587 		*per_cpu_ptr(sdd->sgc, cpu) = NULL;  in claim_allocations()
1615  *   SD_ASYM_PACKING        - describes SMT quirks
1627 	struct sched_domain *child, int cpu)  in sd_init()  argument
1629 	struct sd_data *sdd = &tl->data;  in sd_init()
1630 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);  in sd_init()
1634 	sd_weight = cpumask_weight(tl->mask(tl, cpu));  in sd_init()
1636 	if (tl->sd_flags)  in sd_init()
1637 		sd_flags = (*tl->sd_flags)();  in sd_init()
1667 		.child			= child,  in sd_init()
1668 		.name			= tl->name,  in sd_init()
1672 	cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));  in sd_init()
1675 	sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map);  in sd_init()
1677 	WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==  in sd_init()
1685 	if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)  in sd_init()
1686 		sd->child->flags &= ~SD_PREFER_SIBLING;  in sd_init()
1688 	if (sd->flags & SD_SHARE_CPUCAPACITY) {  in sd_init()
1689 		sd->imbalance_pct = 110;  in sd_init()
1691 	} else if (sd->flags & SD_SHARE_LLC) {  in sd_init()
1692 		sd->imbalance_pct = 117;  in sd_init()
1693 		sd->cache_nice_tries = 1;  in sd_init()
1696 	} else if (sd->flags & SD_NUMA) {  in sd_init()
1697 		sd->cache_nice_tries = 2;  in sd_init()
1699 		sd->flags &= ~SD_PREFER_SIBLING;  in sd_init()
1700 		sd->flags |= SD_SERIALIZE;  in sd_init()
1701 		if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {  in sd_init()
1702 			sd->flags &= ~(SD_BALANCE_EXEC |  in sd_init()
1709 		sd->cache_nice_tries = 1;  in sd_init()
1716 	if (sd->flags & SD_SHARE_LLC) {  in sd_init()
1717 		sd->shared = *per_cpu_ptr(sdd->sds, sd_id);  in sd_init()
1718 		atomic_inc(&sd->shared->ref);  in sd_init()
1719 		atomic_set(&sd->shared->nr_busy_cpus, sd_weight);  in sd_init()
1722 	sd->private = sdd;  in sd_init()
1769  * Topology list, bottom-up.
1792 	for (tl = sched_domain_topology; tl->mask; tl++)
1811 	return sched_domains_numa_masks[tl->numa_level][cpu_to_node(cpu)];  in sd_numa_mask()
1882  * - If the maximum distance between any nodes is 1 hop, the system
1884  * - If for two nodes A and B, located N > 1 hops away from each other,
1885  *   there is an intermediary node C, which is < N hops away from both
1905 			/* Is there an intermediary node between a and b? */  in init_numa_topology_type()
1937 	 * O(nr_nodes^2) de-duplicating selection sort -- in order to find the  in sched_init_numa()
2001 	 * Now for each level, construct a mask per node which contains all  in sched_init_numa()
2020 					sched_numa_warn("Node-distance not symmetric");  in sched_init_numa()
2046 	 * Add the NUMA identity distance, aka single NODE.  in sched_init_numa()
2048 	tl[i++] = SDTL_INIT(sd_numa_mask, NULL, NODE);  in sched_init_numa()
2062 	WRITE_ONCE(sched_max_numa_distance, sched_domains_numa_distance[nr_levels - 1]);  in sched_init_numa()
2107 	int node;  in sched_update_numa()  local
2109 	node = cpu_to_node(cpu);  in sched_update_numa()
2112 	 * node is onlined or the last CPU of a node is offlined.  in sched_update_numa()
2114 	if (cpumask_weight(cpumask_of_node(node)) != 1)  in sched_update_numa()
2118 	sched_init_numa(online ? NUMA_NO_NODE : node);  in sched_update_numa()
2123 	int node = cpu_to_node(cpu);  in sched_domains_numa_masks_set()  local
2131 			/* Set ourselves in the remote node's masks */  in sched_domains_numa_masks_set()
2132 			if (node_distance(j, node) <= sched_domains_numa_distance[i])  in sched_domains_numa_masks_set()
2151  * sched_numa_find_closest() - given the NUMA topology, find the cpu
2185 	int node;  member
2195 	if (cpumask_weight_and(k->cpus, cur_hop[k->node]) <= k->cpu)  in hop_cmp()
2198 	if (b == k->masks) {  in hop_cmp()
2199 		k->w = 0;  in hop_cmp()
2203 	prev_hop = *((struct cpumask ***)b - 1);  in hop_cmp()
2204 	k->w = cpumask_weight_and(k->cpus, prev_hop[k->node]);  in hop_cmp()
2205 	if (k->w <= k->cpu)  in hop_cmp()
2208 	return -1;  in hop_cmp()
2212  * sched_numa_find_nth_cpu() - given the NUMA topology, find the Nth closest CPU
2214  *                             from a given @node.
2217  * @node: NUMA node to order CPUs by distance
2221 int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)  in sched_numa_find_nth_cpu()  argument
2227 	if (node == NUMA_NO_NODE)  in sched_numa_find_nth_cpu()
2232 	/* CPU-less node entries are uninitialized in sched_domains_numa_masks */  in sched_numa_find_nth_cpu()
2233 	node = numa_nearest_node(node, N_CPU);  in sched_numa_find_nth_cpu()
2234 	k.node = node;  in sched_numa_find_nth_cpu()
2243 	hop = hop_masks	- k.masks;  in sched_numa_find_nth_cpu()
2246 		cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) :  in sched_numa_find_nth_cpu()
2247 		cpumask_nth_and(cpu, cpus, k.masks[0][node]);  in sched_numa_find_nth_cpu()
2255  * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from
2256  *                         @node
2257  * @node: The node to count hops from.
2258  * @hops: Include CPUs up to that many hops away. 0 means local node.
2261  * @node, an error value otherwise.
2264  * read-side section, copy it if required beyond that.
2271 const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops)  in sched_numa_hop_mask()  argument
2275 	if (node >= nr_node_ids || hops >= sched_domains_numa_levels)  in sched_numa_hop_mask()
2276 		return ERR_PTR(-EINVAL);  in sched_numa_hop_mask()
2280 		return ERR_PTR(-EBUSY);  in sched_numa_hop_mask()
2282 	return masks[hops][node];  in sched_numa_hop_mask()
2294 		struct sd_data *sdd = &tl->data;  in __sdt_alloc()
2296 		sdd->sd = alloc_percpu(struct sched_domain *);  in __sdt_alloc()
2297 		if (!sdd->sd)  in __sdt_alloc()
2298 			return -ENOMEM;  in __sdt_alloc()
2300 		sdd->sds = alloc_percpu(struct sched_domain_shared *);  in __sdt_alloc()
2301 		if (!sdd->sds)  in __sdt_alloc()
2302 			return -ENOMEM;  in __sdt_alloc()
2304 		sdd->sg = alloc_percpu(struct sched_group *);  in __sdt_alloc()
2305 		if (!sdd->sg)  in __sdt_alloc()
2306 			return -ENOMEM;  in __sdt_alloc()
2308 		sdd->sgc = alloc_percpu(struct sched_group_capacity *);  in __sdt_alloc()
2309 		if (!sdd->sgc)  in __sdt_alloc()
2310 			return -ENOMEM;  in __sdt_alloc()
2321 				return -ENOMEM;  in __sdt_alloc()
2323 			*per_cpu_ptr(sdd->sd, j) = sd;  in __sdt_alloc()
2328 				return -ENOMEM;  in __sdt_alloc()
2330 			*per_cpu_ptr(sdd->sds, j) = sds;  in __sdt_alloc()
2335 				return -ENOMEM;  in __sdt_alloc()
2337 			sg->next = sg;  in __sdt_alloc()
2339 			*per_cpu_ptr(sdd->sg, j) = sg;  in __sdt_alloc()
2344 				return -ENOMEM;  in __sdt_alloc()
2346 			sgc->id = j;  in __sdt_alloc()
2348 			*per_cpu_ptr(sdd->sgc, j) = sgc;  in __sdt_alloc()
2361 		struct sd_data *sdd = &tl->data;  in __sdt_free()
2366 			if (sdd->sd) {  in __sdt_free()
2367 				sd = *per_cpu_ptr(sdd->sd, j);  in __sdt_free()
2368 				if (sd && (sd->flags & SD_NUMA))  in __sdt_free()
2369 					free_sched_groups(sd->groups, 0);  in __sdt_free()
2370 				kfree(*per_cpu_ptr(sdd->sd, j));  in __sdt_free()
2373 			if (sdd->sds)  in __sdt_free()
2374 				kfree(*per_cpu_ptr(sdd->sds, j));  in __sdt_free()
2375 			if (sdd->sg)  in __sdt_free()
2376 				kfree(*per_cpu_ptr(sdd->sg, j));  in __sdt_free()
2377 			if (sdd->sgc)  in __sdt_free()
2378 				kfree(*per_cpu_ptr(sdd->sgc, j));  in __sdt_free()
2380 		free_percpu(sdd->sd);  in __sdt_free()
2381 		sdd->sd = NULL;  in __sdt_free()
2382 		free_percpu(sdd->sds);  in __sdt_free()
2383 		sdd->sds = NULL;  in __sdt_free()
2384 		free_percpu(sdd->sg);  in __sdt_free()
2385 		sdd->sg = NULL;  in __sdt_free()
2386 		free_percpu(sdd->sgc);  in __sdt_free()
2387 		sdd->sgc = NULL;  in __sdt_free()
2393 		struct sched_domain *child, int cpu)  in build_sched_domain()  argument
2395 	struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);  in build_sched_domain()
2397 	if (child) {  in build_sched_domain()
2398 		sd->level = child->level + 1;  in build_sched_domain()
2399 		sched_domain_level_max = max(sched_domain_level_max, sd->level);  in build_sched_domain()
2400 		child->parent = sd;  in build_sched_domain()
2402 		if (!cpumask_subset(sched_domain_span(child),  in build_sched_domain()
2406 					child->name, sd->name);  in build_sched_domain()
2407 			/* Fixup, ensure @sd has at least @child CPUs. */  in build_sched_domain()
2410 				   sched_domain_span(child));  in build_sched_domain()
2421  * any two given CPUs on non-NUMA topology levels.
2436 		if (tl->sd_flags)  in topology_span_sane()
2437 			tl_common_flags = (*tl->sd_flags)();  in topology_span_sane()
2447 		 * Non-NUMA levels cannot partially overlap - they must be either  in topology_span_sane()
2449 		 * breaking the sched_group lists - i.e. a later get_group() pass  in topology_span_sane()
2453 			const struct cpumask *tl_cpu_mask = tl->mask(tl, cpu);  in topology_span_sane()
2461 				if (!cpumask_equal(tl->mask(tl, id), tl_cpu_mask))  in topology_span_sane()
2487 	int i, ret = -ENOMEM;  in build_sched_domains()
2507 			has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;  in build_sched_domains()
2521 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2522 			sd->span_weight = cpumask_weight(sched_domain_span(sd));  in build_sched_domains()
2523 			if (sd->flags & SD_NUMA) {  in build_sched_domains()
2541 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2542 			struct sched_domain *child = sd->child;  in build_sched_domains()  local
2544 			if (!(sd->flags & SD_SHARE_LLC) && child &&  in build_sched_domains()
2545 			    (child->flags & SD_SHARE_LLC)) {  in build_sched_domains()
2550 				 * For a single LLC per node, allow an  in build_sched_domains()
2551 				 * imbalance up to 12.5% of the node. This is  in build_sched_domains()
2552 				 * arbitrary cutoff based two factors -- SMT and  in build_sched_domains()
2553 				 * memory channels. For SMT-2, the intent is to  in build_sched_domains()
2555 				 * SMT-4 or SMT-8 *may* benefit from a different  in build_sched_domains()
2563 				 * on one node while LLCs on another node  in build_sched_domains()
2569 				nr_llcs = sd->span_weight / child->span_weight;  in build_sched_domains()
2571 					imb = sd->span_weight >> 3;  in build_sched_domains()
2575 				sd->imb_numa_nr = imb;  in build_sched_domains()
2578 				top_p = sd->parent;  in build_sched_domains()
2579 				while (top_p && !(top_p->flags & SD_NUMA)) {  in build_sched_domains()
2580 					top_p = top_p->parent;  in build_sched_domains()
2582 				imb_span = top_p ? top_p->span_weight : sd->span_weight;  in build_sched_domains()
2584 				int factor = max(1U, (sd->span_weight / imb_span));  in build_sched_domains()
2586 				sd->imb_numa_nr = imb * factor;  in build_sched_domains()
2592 	for (i = nr_cpumask_bits-1; i >= 0; i--) {  in build_sched_domains()
2596 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2805 		/* No match - a current sched domain not in new doms_new[] */  in partition_sched_domains_locked()
2826 		/* No match - add a new doms_new */  in partition_sched_domains_locked()
2837 			    cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {  in partition_sched_domains_locked()
2842 		/* No match - add perf domains for a new rd */  in partition_sched_domains_locked()