timer_migration.c - OpenGrok cross reference for /linux/kernel/time/timer

Lines Matching +full:- +full:group
1 // SPDX-License-Identifier: GPL-2.0-only
15 #include "tick-internal.h"
22  * lowest level group contains CPUs, the next level groups of CPU groups
34  *               GRP0:0 - GRP0:2               GRP0:3 - GRP0:5
37  * CPUS     0-7       8-15      16-23     24-31     32-39     40-47
43  * Each group has a designated migrator CPU/group as long as a CPU/group is
44  * active in the group. This designated role is necessary to avoid that all
45  * active CPUs in a group try to migrate expired timers from other CPUs,
48  * When a CPU is awake, it checks in it's own timer tick the group
53  * If it finds expired timers in one of the group queues it pulls them over
55  * group and the parent groups if required.
60  * CPU does not queue an event in the LVL0 group. If the next migratable
62  * in the LVL0 group. In both cases the CPU marks itself idle in the LVL0
63  * group.
65  * When CPU comes out of idle and when a group has at least a single active
73  * If the CPU is the migrator of the group then it delegates that role to
74  * the next active CPU in the group or sets migrator to TMIGR_NONE when
75  * there is no active CPU in the group. This delegation needs to be
86  * ---------------
89  * not destroyed when a group becomes empty due to offlining. The group
96  * --------------
99  * child and parent group. The lock ordering is always bottom up. This also
101  * active CPU/group information atomic_try_cmpxchg() is used instead and only
102  * the per CPU tmigr_cpu->lock is held.
107  * When @timer_base->lock as well as tmigr related locks are required, the lock
108  * ordering is: first @timer_base->lock, afterwards tmigr related locks.
111  * Protection of the tmigr group state information:
112  * ------------------------------------------------
117  * lockless and group wise. The following scenario describes what happens
135  * 1. CPU0 goes idle. As the update is performed group wise, in the first step
144  *       --> migrator = TMIGR_NONE     migrator = CPU2
145  *       --> active   =                active   = CPU2
148  *         --> idle        idle           active      idle
161  *       --> migrator = CPU1           migrator = CPU2
162  *       --> active   = CPU1           active   = CPU2
165  *             idle    --> active         active      idle
173  *                 --> migrator = GRP0:1
174  *                 --> active   = GRP0:0, GRP0:1
186  *                 --> migrator = GRP0:1
187  *                 --> active   = GRP0:1
202  * expected value (compare-and-exchange).
210  * ----------------------------------------------------------
213  * first global timer of an idle CPU, the group and child states have to be read
241  *           migrator = TMIGR_NONE --> migrator = TMIGR_NONE
242  *           active   =            --> active   =
246  *             idle        idle       --> idle        idle
249  *    child going idle in top level group, the expiry of the next group event
254  *                 --> migrator = TMIGR_NONE
255  *                 --> active   =
264  *             idle        idle       --> idle        idle
277  *       --> next_expiry = TIMER0      next_expiry  = KTIME_MAX
286  *    top level group.
291  *                 --> next_expiry = TIMER0
305  *   --------------------------          ---------------------------
307  *   cmpxchg(&GRP1:0->state);
309  *       spin_lock(&GRP1:0->lock);
312  *       spin_unlock(&GRP1:0->lock);
316  *                                           spin_lock(&GRP1:0->lock)
318  *                                           group_state = atomic_read(&GRP1:0->state)
321  *                                           spin_unlock(&GRP1:0->lock) <3>
328  * update of the group state from active path is no problem, as the upcoming CPU
329  * will take care of the group events.
332  * -----------------------------------------------------------
347  *                 --> timerqueue = evt-GRP0:0
354  *           timerqueue = evt-CPU0,    timerqueue =
355  *                        evt-CPU1
360  * 2. CPU2 starts to expire remote timers. It starts with LVL0 group
369  *                 --> timerqueue =
375  *       --> groupevt.cpu = CPU0       groupevt.cpu =
376  *           timerqueue = evt-CPU0,    timerqueue =
377  *                        evt-CPU1
390  *    in GRP0:0's timerqueue and therefore set in the CPU field of the group
397  *                 --> timerqueue = evt-GRP0:0
403  *       --> groupevt.cpu = CPU1       groupevt.cpu =
404  *       --> timerqueue = evt-CPU1     timerqueue =
414  * of the group as migrator and any needed updates within the hierarchy.
430 	return !(tmc->tmgroup && tmc->online);  in tmigr_is_not_available()
434  * Returns true, when @childmask corresponds to the group migrator or when the
435  * group is not active - so no migrator is set.
437 static bool tmigr_check_migrator(struct tmigr_group *group, u8 childmask)  in tmigr_check_migrator()  argument
441 	s.state = atomic_read(&group->migr_state);  in tmigr_check_migrator()
449 static bool tmigr_check_migrator_and_lonely(struct tmigr_group *group, u8 childmask)  in tmigr_check_migrator_and_lonely()  argument
455 	s.state = atomic_read(&group->migr_state);  in tmigr_check_migrator_and_lonely()
466 static bool tmigr_check_lonely(struct tmigr_group *group)  in tmigr_check_lonely()  argument
471 	s.state = atomic_read(&group->migr_state);  in tmigr_check_lonely()
479  * struct tmigr_walk - data required for walking the hierarchy
489  *			in top level group only. Be aware, there could occur a
492  *			parent group in walk_groups(). Then @firstexp might
497  *			child group)
498  * @childmask:		groupmask of child group
528 	struct tmigr_group *child = NULL, *group = tmc->tmgroup;  in __walk_groups()  local
531 		WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels);  in __walk_groups()
533 		if (up(group, child, data))  in __walk_groups()
536 		child = group;  in __walk_groups()
538 		 * Pairs with the store release on group connection  in __walk_groups()
539 		 * to make sure group initialization is visible.  in __walk_groups()
541 		group = READ_ONCE(group->parent);  in __walk_groups()
542 		data->childmask = child->groupmask;  in __walk_groups()
543 		WARN_ON_ONCE(!data->childmask);  in __walk_groups()
544 	} while (group);  in __walk_groups()
549 	lockdep_assert_held(&tmc->lock);  in walk_groups()
555  * Returns the next event of the timerqueue @group->events
557  * Removes timers with ignore flag and update next_expiry of the group. Values
558  * of the group event are updated in tmigr_update_events() only.
560 static struct tmigr_event *tmigr_next_groupevt(struct tmigr_group *group)  in tmigr_next_groupevt()  argument
565 	lockdep_assert_held(&group->lock);  in tmigr_next_groupevt()
567 	WRITE_ONCE(group->next_expiry, KTIME_MAX);  in tmigr_next_groupevt()
569 	while ((node = timerqueue_getnext(&group->events))) {  in tmigr_next_groupevt()
572 		if (!READ_ONCE(evt->ignore)) {  in tmigr_next_groupevt()
573 			WRITE_ONCE(group->next_expiry, evt->nextevt.expires);  in tmigr_next_groupevt()
578 		 * Remove next timers with ignore flag, because the group lock  in tmigr_next_groupevt()
581 		if (!timerqueue_del(&group->events, node))  in tmigr_next_groupevt()
593 static struct tmigr_event *tmigr_next_expired_groupevt(struct tmigr_group *group,  in tmigr_next_expired_groupevt()  argument
596 	struct tmigr_event *evt = tmigr_next_groupevt(group);  in tmigr_next_expired_groupevt()
598 	if (!evt || now < evt->nextevt.expires)  in tmigr_next_expired_groupevt()
602 	 * The event is ready to expire. Remove it and update next group event.  in tmigr_next_expired_groupevt()
604 	timerqueue_del(&group->events, &evt->nextevt);  in tmigr_next_expired_groupevt()
605 	tmigr_next_groupevt(group);  in tmigr_next_expired_groupevt()
610 static u64 tmigr_next_groupevt_expires(struct tmigr_group *group)  in tmigr_next_groupevt_expires()  argument
614 	evt = tmigr_next_groupevt(group);  in tmigr_next_groupevt_expires()
619 		return evt->nextevt.expires;  in tmigr_next_groupevt_expires()
622 static bool tmigr_active_up(struct tmigr_group *group,  in tmigr_active_up()  argument
630 	childmask = data->childmask;  in tmigr_active_up()
633 	 * tmigr_inactive_up(), as the group state change does not depend on the  in tmigr_active_up()
636 	curstate.state = atomic_read(&group->migr_state);  in tmigr_active_up()
652 	} while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state));  in tmigr_active_up()
654 	trace_tmigr_group_set_cpu_active(group, newstate, childmask);  in tmigr_active_up()
657 	 * The group is active (again). The group event might be still queued  in tmigr_active_up()
658 	 * into the parent group's timerqueue but can now be handled by the  in tmigr_active_up()
659 	 * migrator of this group. Therefore the ignore flag for the group event  in tmigr_active_up()
663 	 * worst case the migrator of the parent group observes the change too  in tmigr_active_up()
664 	 * late and expires remotely all events belonging to this group. The  in tmigr_active_up()
668 	WRITE_ONCE(group->groupevt.ignore, true);  in tmigr_active_up()
677 	data.childmask = tmc->groupmask;  in __tmigr_cpu_activate()
681 	tmc->cpuevt.ignore = true;  in __tmigr_cpu_activate()
682 	WRITE_ONCE(tmc->wakeup, KTIME_MAX);  in __tmigr_cpu_activate()
688  * tmigr_cpu_activate() - set this CPU active in timer migration hierarchy
699 	if (WARN_ON_ONCE(!tmc->idle))  in tmigr_cpu_activate()
702 	raw_spin_lock(&tmc->lock);  in tmigr_cpu_activate()
703 	tmc->idle = false;  in tmigr_cpu_activate()
705 	raw_spin_unlock(&tmc->lock);  in tmigr_cpu_activate()
711  * @data->firstexp is set to expiry of first gobal event of the (top level of
714  * The child and group states need to be read under the lock, to prevent a race
719  * This is the only place where the group event expiry value is set.
722 bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,  in tmigr_update_events()  argument
727 	bool remote = data->remote;  in tmigr_update_events()
733 		raw_spin_lock(&child->lock);  in tmigr_update_events()
734 		raw_spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING);  in tmigr_update_events()
736 		childstate.state = atomic_read(&child->migr_state);  in tmigr_update_events()
737 		groupstate.state = atomic_read(&group->migr_state);  in tmigr_update_events()
745 		nextexp = child->next_expiry;  in tmigr_update_events()
746 		evt = &child->groupevt;  in tmigr_update_events()
755 		WRITE_ONCE(evt->ignore, ignore);  in tmigr_update_events()
757 		nextexp = data->nextexp;  in tmigr_update_events()
759 		first_childevt = evt = data->evt;  in tmigr_update_events()
760 		ignore = evt->ignore;  in tmigr_update_events()
772 		 *  - When entering this path by tmigr_new_timer(), @evt->ignore  in tmigr_update_events()
774 		 *  - tmigr_inactive_up() takes care of the propagation by  in tmigr_update_events()
776 		 *    return is possible if there is a parent, sparing group  in tmigr_update_events()
779 		 *    within the group and update next_expiry accordingly.  in tmigr_update_events()
782 		 * single level so @group is the top level group, make sure the  in tmigr_update_events()
783 		 * first event information of the group is updated properly and  in tmigr_update_events()
786 		if (ignore && !remote && group->parent)  in tmigr_update_events()
789 		raw_spin_lock(&group->lock);  in tmigr_update_events()
792 		groupstate.state = atomic_read(&group->migr_state);  in tmigr_update_events()
796 	 * If the child event is already queued in the group, remove it from the  in tmigr_update_events()
799 	if (timerqueue_node_queued(&evt->nextevt)) {  in tmigr_update_events()
800 		if ((evt->nextevt.expires == nextexp) && !ignore) {  in tmigr_update_events()
802 			evt->cpu = first_childevt->cpu;  in tmigr_update_events()
806 		if (!timerqueue_del(&group->events, &evt->nextevt))  in tmigr_update_events()
807 			WRITE_ONCE(group->next_expiry, KTIME_MAX);  in tmigr_update_events()
814 		 * the group is already active, there is no need to walk the  in tmigr_update_events()
815 		 * hierarchy even if there is a parent group.  in tmigr_update_events()
818 		 * if a remote timer handling was executed before and the group  in tmigr_update_events()
820 		 * an enqueued timer in the non active group. The enqueued timer  in tmigr_update_events()
821 		 * of the group needs to be propagated to a higher level to  in tmigr_update_events()
827 		evt->nextevt.expires = nextexp;  in tmigr_update_events()
828 		evt->cpu = first_childevt->cpu;  in tmigr_update_events()
830 		if (timerqueue_add(&group->events, &evt->nextevt))  in tmigr_update_events()
831 			WRITE_ONCE(group->next_expiry, nextexp);  in tmigr_update_events()
835 	if (!group->parent && (groupstate.migrator == TMIGR_NONE)) {  in tmigr_update_events()
840 		 * handling. First timer in top level group which needs to be  in tmigr_update_events()
841 		 * handled when top level group is not active, is calculated  in tmigr_update_events()
848 		 * The top level group is idle and it has to be ensured the  in tmigr_update_events()
854 		data->firstexp = tmigr_next_groupevt_expires(group);  in tmigr_update_events()
857 	trace_tmigr_update_events(child, group, childstate, groupstate,  in tmigr_update_events()
861 	raw_spin_unlock(&group->lock);  in tmigr_update_events()
864 		raw_spin_unlock(&child->lock);  in tmigr_update_events()
869 static bool tmigr_new_timer_up(struct tmigr_group *group,  in tmigr_new_timer_up()  argument
873 	return tmigr_update_events(group, child, data);  in tmigr_new_timer_up()
885 				   .evt = &tmc->cpuevt };  in tmigr_new_timer()
887 	lockdep_assert_held(&tmc->lock);  in tmigr_new_timer()
889 	if (tmc->remote)  in tmigr_new_timer()
894 	tmc->cpuevt.ignore = false;  in tmigr_new_timer()
912 	raw_spin_lock_irq(&tmc->lock);  in tmigr_handle_remote_cpu()
929 	if (!tmc->online || tmc->remote || tmc->cpuevt.ignore ||  in tmigr_handle_remote_cpu()
930 	    now < tmc->cpuevt.nextevt.expires) {  in tmigr_handle_remote_cpu()
931 		raw_spin_unlock_irq(&tmc->lock);  in tmigr_handle_remote_cpu()
937 	tmc->remote = true;  in tmigr_handle_remote_cpu()
938 	WRITE_ONCE(tmc->wakeup, KTIME_MAX);  in tmigr_handle_remote_cpu()
941 	raw_spin_unlock_irq(&tmc->lock);  in tmigr_handle_remote_cpu()
947 	 * Lock ordering needs to be preserved - timer_base locks before tmigr  in tmigr_handle_remote_cpu()
949 	 * the top). During fetching the next timer interrupt, also tmc->lock  in tmigr_handle_remote_cpu()
959 	 * the timerqueue and group events.  in tmigr_handle_remote_cpu()
963 	raw_spin_lock(&tmc->lock);  in tmigr_handle_remote_cpu()
976 	if (!tmc->online || !tmc->idle) {  in tmigr_handle_remote_cpu()
987 	data.evt = &tmc->cpuevt;  in tmigr_handle_remote_cpu()
998 	tmc->remote = false;  in tmigr_handle_remote_cpu()
999 	raw_spin_unlock_irq(&tmc->lock);  in tmigr_handle_remote_cpu()
1002 static bool tmigr_handle_remote_up(struct tmigr_group *group,  in tmigr_handle_remote_up()  argument
1011 	jif = data->basej;  in tmigr_handle_remote_up()
1012 	now = data->now;  in tmigr_handle_remote_up()
1014 	childmask = data->childmask;  in tmigr_handle_remote_up()
1016 	trace_tmigr_handle_remote(group);  in tmigr_handle_remote_up()
1019 	 * Handle the group only if @childmask is the migrator or if the  in tmigr_handle_remote_up()
1020 	 * group has no migrator. Otherwise the group is active and is  in tmigr_handle_remote_up()
1023 	if (!tmigr_check_migrator(group, childmask))  in tmigr_handle_remote_up()
1026 	raw_spin_lock_irq(&group->lock);  in tmigr_handle_remote_up()
1028 	evt = tmigr_next_expired_groupevt(group, now);  in tmigr_handle_remote_up()
1031 		unsigned int remote_cpu = evt->cpu;  in tmigr_handle_remote_up()
1033 		raw_spin_unlock_irq(&group->lock);  in tmigr_handle_remote_up()
1043 	 * (group->next_expiry was updated by tmigr_next_expired_groupevt(),  in tmigr_handle_remote_up()
1046 	data->firstexp = group->next_expiry;  in tmigr_handle_remote_up()
1048 	raw_spin_unlock_irq(&group->lock);  in tmigr_handle_remote_up()
1054  * tmigr_handle_remote() - Handle global timers of remote idle CPUs
1066 	data.childmask = tmc->groupmask;  in tmigr_handle_remote()
1074 	if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) {  in tmigr_handle_remote()
1080 		if (READ_ONCE(tmc->wakeup) == KTIME_MAX)  in tmigr_handle_remote()
1087 	 * Update @tmc->wakeup only at the end and do not reset @tmc->wakeup to  in tmigr_handle_remote()
1088 	 * KTIME_MAX. Even if tmc->lock is not held during the whole remote  in tmigr_handle_remote()
1089 	 * handling, tmc->wakeup is fine to be stale as it is called in  in tmigr_handle_remote()
1096 	raw_spin_lock_irq(&tmc->lock);  in tmigr_handle_remote()
1097 	WRITE_ONCE(tmc->wakeup, data.firstexp);  in tmigr_handle_remote()
1098 	raw_spin_unlock_irq(&tmc->lock);  in tmigr_handle_remote()
1101 static bool tmigr_requires_handle_remote_up(struct tmigr_group *group,  in tmigr_requires_handle_remote_up()  argument
1107 	childmask = data->childmask;  in tmigr_requires_handle_remote_up()
1110 	 * Handle the group only if the child is the migrator or if the group  in tmigr_requires_handle_remote_up()
1111 	 * has no migrator. Otherwise the group is active and is handled by its  in tmigr_requires_handle_remote_up()
1114 	if (!tmigr_check_migrator(group, childmask))  in tmigr_requires_handle_remote_up()
1118 	 * When there is a parent group and the CPU which triggered the  in tmigr_requires_handle_remote_up()
1120 	 * group before reading the next_expiry value.  in tmigr_requires_handle_remote_up()
1122 	if (group->parent && !data->tmc_active)  in tmigr_requires_handle_remote_up()
1132 		data->firstexp = READ_ONCE(group->next_expiry);  in tmigr_requires_handle_remote_up()
1133 		if (data->now >= data->firstexp) {  in tmigr_requires_handle_remote_up()
1134 			data->check = true;  in tmigr_requires_handle_remote_up()
1138 		raw_spin_lock(&group->lock);  in tmigr_requires_handle_remote_up()
1139 		data->firstexp = group->next_expiry;  in tmigr_requires_handle_remote_up()
1140 		if (data->now >= group->next_expiry) {  in tmigr_requires_handle_remote_up()
1141 			data->check = true;  in tmigr_requires_handle_remote_up()
1142 			raw_spin_unlock(&group->lock);  in tmigr_requires_handle_remote_up()
1145 		raw_spin_unlock(&group->lock);  in tmigr_requires_handle_remote_up()
1152  * tmigr_requires_handle_remote() - Check the need of remote timer handling
1167 	data.childmask = tmc->groupmask;  in tmigr_requires_handle_remote()
1169 	data.tmc_active = !tmc->idle;  in tmigr_requires_handle_remote()
1176 	 * Check is done lockless as interrupts are disabled and @tmc->idle is  in tmigr_requires_handle_remote()
1179 	if (!tmc->idle) {  in tmigr_requires_handle_remote()
1186 	 * When the CPU is idle, compare @tmc->wakeup with @data.now. The lock  in tmigr_requires_handle_remote()
1192 		if (data.now >= READ_ONCE(tmc->wakeup))  in tmigr_requires_handle_remote()
1195 		raw_spin_lock(&tmc->lock);  in tmigr_requires_handle_remote()
1196 		if (data.now >= tmc->wakeup)  in tmigr_requires_handle_remote()
1198 		raw_spin_unlock(&tmc->lock);  in tmigr_requires_handle_remote()
1205  * tmigr_cpu_new_timer() - enqueue next global timer into hierarchy (idle tmc)
1210  * and thereby the timer idle path is executed once more. @tmc->wakeup
1225 	raw_spin_lock(&tmc->lock);  in tmigr_cpu_new_timer()
1227 	ret = READ_ONCE(tmc->wakeup);  in tmigr_cpu_new_timer()
1229 		if (nextexp != tmc->cpuevt.nextevt.expires ||  in tmigr_cpu_new_timer()
1230 		    tmc->cpuevt.ignore) {  in tmigr_cpu_new_timer()
1236 			WRITE_ONCE(tmc->wakeup, ret);  in tmigr_cpu_new_timer()
1240 	raw_spin_unlock(&tmc->lock);  in tmigr_cpu_new_timer()
1244 static bool tmigr_inactive_up(struct tmigr_group *group,  in tmigr_inactive_up()  argument
1252 	childmask = data->childmask;  in tmigr_inactive_up()
1257 	 * to make sure the updates of child and group states are ordered. The  in tmigr_inactive_up()
1258 	 * ordering is mandatory, as the group state change depends on the child  in tmigr_inactive_up()
1261 	curstate.state = atomic_read_acquire(&group->migr_state);  in tmigr_inactive_up()
1265 			childstate.state = atomic_read(&child->migr_state);  in tmigr_inactive_up()
1276 			 * Find a new migrator for the group, because the child  in tmigr_inactive_up()
1277 			 * group is idle!  in tmigr_inactive_up()
1299 		if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) {  in tmigr_inactive_up()
1300 			trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);  in tmigr_inactive_up()
1306 		 * tmigr_active_up() to make sure the updates of child and group  in tmigr_inactive_up()
1313 	data->remote = false;  in tmigr_inactive_up()
1316 	tmigr_update_events(group, child, data);  in tmigr_inactive_up()
1325 				   .evt = &tmc->cpuevt,  in __tmigr_cpu_deactivate()
1326 				   .childmask = tmc->groupmask };  in __tmigr_cpu_deactivate()
1334 		tmc->cpuevt.ignore = false;  in __tmigr_cpu_deactivate()
1341  * tmigr_cpu_deactivate() - Put current CPU into inactive state
1358 	raw_spin_lock(&tmc->lock);  in tmigr_cpu_deactivate()
1362 	tmc->idle = true;  in tmigr_cpu_deactivate()
1368 	WRITE_ONCE(tmc->wakeup, ret);  in tmigr_cpu_deactivate()
1371 	raw_spin_unlock(&tmc->lock);  in tmigr_cpu_deactivate()
1376  * tmigr_quick_check() - Quick forecast of next tmigr event when CPU wants to
1381  * * KTIME_MAX		- when it is probable that nothing has to be done (not
1382  *			  the only one in the level 0 group; and if it is the
1383  *			  only one in level 0 group, but there are more than a
1384  *			  single group active on the way to top level)
1385  * * nextevt		- when CPU is offline and has to handle timer on its own
1386  *			  or when on the way to top in every group only a single
1389  * * next_expiry	- value of lowest expiry encountered while walking groups
1396 	struct tmigr_group *group = tmc->tmgroup;  in tmigr_quick_check()  local
1401 	if (WARN_ON_ONCE(tmc->idle))  in tmigr_quick_check()
1404 	if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask))  in tmigr_quick_check()
1408 		if (!tmigr_check_lonely(group))  in tmigr_quick_check()
1417 		nextevt = min_t(u64, nextevt, READ_ONCE(group->next_expiry));  in tmigr_quick_check()
1418 		group = group->parent;  in tmigr_quick_check()
1419 	} while (group);  in tmigr_quick_check()
1425  * tmigr_trigger_active() - trigger a CPU to become active again
1435 	WARN_ON_ONCE(!tmc->online || tmc->idle);  in tmigr_trigger_active()
1446 	raw_spin_lock_irq(&tmc->lock);  in tmigr_cpu_offline()
1447 	tmc->online = false;  in tmigr_cpu_offline()
1448 	WRITE_ONCE(tmc->wakeup, KTIME_MAX);  in tmigr_cpu_offline()
1456 	raw_spin_unlock_irq(&tmc->lock);  in tmigr_cpu_offline()
1471 	if (WARN_ON_ONCE(!tmc->tmgroup))  in tmigr_cpu_online()
1472 		return -EINVAL;  in tmigr_cpu_online()
1474 	raw_spin_lock_irq(&tmc->lock);  in tmigr_cpu_online()
1476 	tmc->idle = timer_base_is_idle();  in tmigr_cpu_online()
1477 	if (!tmc->idle)  in tmigr_cpu_online()
1479 	tmc->online = true;  in tmigr_cpu_online()
1480 	raw_spin_unlock_irq(&tmc->lock);  in tmigr_cpu_online()
1484 static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,  in tmigr_init_group()  argument
1489 	raw_spin_lock_init(&group->lock);  in tmigr_init_group()
1491 	group->level = lvl;  in tmigr_init_group()
1492 	group->numa_node = lvl < tmigr_crossnode_level ? node : NUMA_NO_NODE;  in tmigr_init_group()
1494 	group->num_children = 0;  in tmigr_init_group()
1499 	atomic_set(&group->migr_state, s.state);  in tmigr_init_group()
1502 	 * If this is a new top-level, prepare its groupmask in advance.  in tmigr_init_group()
1503 	 * This avoids accidents where yet another new top-level is  in tmigr_init_group()
1507 		group->groupmask = BIT(0);  in tmigr_init_group()
1513 			group->num_children = 1;  in tmigr_init_group()
1516 	timerqueue_init_head(&group->events);  in tmigr_init_group()
1517 	timerqueue_init(&group->groupevt.nextevt);  in tmigr_init_group()
1518 	group->groupevt.nextevt.expires = KTIME_MAX;  in tmigr_init_group()
1519 	WRITE_ONCE(group->next_expiry, KTIME_MAX);  in tmigr_init_group()
1520 	group->groupevt.ignore = true;  in tmigr_init_group()
1526 	struct tmigr_group *tmp, *group = NULL;  in tmigr_get_group()  local
1530 	/* Try to attach to an existing group first */  in tmigr_get_group()
1534 		 * this group belongs to the same NUMA node.  in tmigr_get_group()
1536 		if (lvl < tmigr_crossnode_level && tmp->numa_node != node)  in tmigr_get_group()
1540 		if (tmp->num_children >= TMIGR_CHILDREN_PER_GROUP)  in tmigr_get_group()
1545 		 * siblings end up in the same group of the lowest level of the  in tmigr_get_group()
1550 		group = tmp;  in tmigr_get_group()
1554 	if (group)  in tmigr_get_group()
1555 		return group;  in tmigr_get_group()
1557 	/* Allocate and	set up a new group */  in tmigr_get_group()
1558 	group = kzalloc_node(sizeof(*group), GFP_KERNEL, node);  in tmigr_get_group()
1559 	if (!group)  in tmigr_get_group()
1560 		return ERR_PTR(-ENOMEM);  in tmigr_get_group()
1562 	tmigr_init_group(group, lvl, node);  in tmigr_get_group()
1565 	list_add(&group->list, &tmigr_level_list[lvl]);  in tmigr_get_group()
1566 	trace_tmigr_group_set(group);  in tmigr_get_group()
1567 	return group;  in tmigr_get_group()
1576 	raw_spin_lock_irq(&child->lock);  in tmigr_connect_child_parent()
1577 	raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);  in tmigr_connect_child_parent()
1582 		 * case groupmask is pre-initialized and @child already  in tmigr_connect_child_parent()
1586 		WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2);  in tmigr_connect_child_parent()
1589 		child->groupmask = BIT(parent->num_children++);  in tmigr_connect_child_parent()
1597 	smp_store_release(&child->parent, parent);  in tmigr_connect_child_parent()
1599 	raw_spin_unlock(&parent->lock);  in tmigr_connect_child_parent()
1600 	raw_spin_unlock_irq(&child->lock);  in tmigr_connect_child_parent()
1610 	 * in the parent group:  in tmigr_connect_child_parent()
1617 	 * * But if a new group above the current top level is required, it is  in tmigr_connect_child_parent()
1620 	 *   executed with the formerly top level group (child) and the newly  in tmigr_connect_child_parent()
1621 	 *   created group (parent).  in tmigr_connect_child_parent()
1627 	 *   level group. And the next time it goes inactive, it will release  in tmigr_connect_child_parent()
1631 	data.childmask = child->groupmask;  in tmigr_connect_child_parent()
1639 	WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);  in tmigr_connect_child_parent()
1644 	struct tmigr_group *group, *child, **stack;  in tmigr_setup_groups()  local
1650 		return -ENOMEM;  in tmigr_setup_groups()
1653 		group = tmigr_get_group(cpu, node, i);  in tmigr_setup_groups()
1654 		if (IS_ERR(group)) {  in tmigr_setup_groups()
1655 			err = PTR_ERR(group);  in tmigr_setup_groups()
1660 		stack[i++] = group;  in tmigr_setup_groups()
1668 		 * single group.  in tmigr_setup_groups()
1670 		if (group->parent || list_is_singular(&tmigr_level_list[i - 1]))  in tmigr_setup_groups()
1676 	WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top]));  in tmigr_setup_groups()
1679 		group = stack[--i];  in tmigr_setup_groups()
1682 			list_del(&group->list);  in tmigr_setup_groups()
1683 			kfree(group);  in tmigr_setup_groups()
1687 		WARN_ON_ONCE(i != group->level);  in tmigr_setup_groups()
1690 		 * Update tmc -> group / child -> group connection  in tmigr_setup_groups()
1695 			raw_spin_lock_irq(&group->lock);  in tmigr_setup_groups()
1697 			tmc->tmgroup = group;  in tmigr_setup_groups()
1698 			tmc->groupmask = BIT(group->num_children++);  in tmigr_setup_groups()
1700 			raw_spin_unlock_irq(&group->lock);  in tmigr_setup_groups()
1707 			child = stack[i - 1];  in tmigr_setup_groups()
1709 			tmigr_connect_child_parent(child, group, false);  in tmigr_setup_groups()
1722 		 * CPU's child group and pre-accounted the old root.  in tmigr_setup_groups()
1724 		if (group->num_children == 2 && list_is_singular(lvllist)) {  in tmigr_setup_groups()
1728 			 * it may spuriously activate the old top level group inside  in tmigr_setup_groups()
1729 			 * the new one (nevertheless whether old top level group is  in tmigr_setup_groups()
1734 			lvllist = &tmigr_level_list[top - 1];  in tmigr_setup_groups()
1736 				if (child->parent)  in tmigr_setup_groups()
1739 				tmigr_connect_child_parent(child, group, true);  in tmigr_setup_groups()
1767 	if (tmc->tmgroup)  in tmigr_cpu_prepare()
1770 	raw_spin_lock_init(&tmc->lock);  in tmigr_cpu_prepare()
1771 	timerqueue_init(&tmc->cpuevt.nextevt);  in tmigr_cpu_prepare()
1772 	tmc->cpuevt.nextevt.expires = KTIME_MAX;  in tmigr_cpu_prepare()
1773 	tmc->cpuevt.ignore = true;  in tmigr_cpu_prepare()
1774 	tmc->cpuevt.cpu = cpu;  in tmigr_cpu_prepare()
1775 	tmc->remote = false;  in tmigr_cpu_prepare()
1776 	WRITE_ONCE(tmc->wakeup, KTIME_MAX);  in tmigr_cpu_prepare()
1782 	if (tmc->groupmask == 0)  in tmigr_cpu_prepare()
1783 		return -EINVAL;  in tmigr_cpu_prepare()
1793 	int ret = -ENOMEM;  in tmigr_init()
1824 	 * If a NUMA node spawns more than one CPU level group then the next  in tmigr_init()
1839 	pr_info("Timer migration: %d hierarchy levels; %d children per group;"  in tmigr_init()