Lines Matching +full:- +full:group

1 // SPDX-License-Identifier: GPL-2.0-only
15 #include "tick-internal.h"
22 * lowest level group contains CPUs, the next level groups of CPU groups
34 * GRP0:0 - GRP0:2 GRP0:3 - GRP0:5
37 * CPUS 0-7 8-15 16-23 24-31 32-39 40-47
43 * Each group has a designated migrator CPU/group as long as a CPU/group is
44 * active in the group. This designated role is necessary to avoid that all
45 * active CPUs in a group try to migrate expired timers from other CPUs,
48 * When a CPU is awake, it checks in it's own timer tick the group
53 * If it finds expired timers in one of the group queues it pulls them over
55 * group and the parent groups if required.
60 * CPU does not queue an event in the LVL0 group. If the next migratable
62 * in the LVL0 group. In both cases the CPU marks itself idle in the LVL0
63 * group.
65 * When CPU comes out of idle and when a group has at least a single active
73 * If the CPU is the migrator of the group then it delegates that role to
74 * the next active CPU in the group or sets migrator to TMIGR_NONE when
75 * there is no active CPU in the group. This delegation needs to be
86 * ---------------
89 * not destroyed when a group becomes empty due to offlining. The group
96 * --------------
99 * child and parent group. The lock ordering is always bottom up. This also
101 * active CPU/group information atomic_try_cmpxchg() is used instead and only
102 * the per CPU tmigr_cpu->lock is held.
107 * When @timer_base->lock as well as tmigr related locks are required, the lock
108 * ordering is: first @timer_base->lock, afterwards tmigr related locks.
111 * Protection of the tmigr group state information:
112 * ------------------------------------------------
117 * lockless and group wise. The following scenario describes what happens
135 * 1. CPU0 goes idle. As the update is performed group wise, in the first step
144 * --> migrator = TMIGR_NONE migrator = CPU2
145 * --> active = active = CPU2
148 * --> idle idle active idle
161 * --> migrator = CPU1 migrator = CPU2
162 * --> active = CPU1 active = CPU2
165 * idle --> active active idle
173 * --> migrator = GRP0:1
174 * --> active = GRP0:0, GRP0:1
186 * --> migrator = GRP0:1
187 * --> active = GRP0:1
202 * expected value (compare-and-exchange).
210 * ----------------------------------------------------------
213 * first global timer of an idle CPU, the group and child states have to be read
241 * migrator = TMIGR_NONE --> migrator = TMIGR_NONE
242 * active = --> active =
246 * idle idle --> idle idle
249 * child going idle in top level group, the expiry of the next group event
254 * --> migrator = TMIGR_NONE
255 * --> active =
264 * idle idle --> idle idle
277 * --> next_expiry = TIMER0 next_expiry = KTIME_MAX
286 * top level group.
291 * --> next_expiry = TIMER0
305 * -------------------------- ---------------------------
307 * cmpxchg(&GRP1:0->state);
309 * spin_lock(&GRP1:0->lock);
312 * spin_unlock(&GRP1:0->lock);
316 * spin_lock(&GRP1:0->lock)
318 * group_state = atomic_read(&GRP1:0->state)
321 * spin_unlock(&GRP1:0->lock) <3>
328 * update of the group state from active path is no problem, as the upcoming CPU
329 * will take care of the group events.
332 * -----------------------------------------------------------
347 * --> timerqueue = evt-GRP0:0
354 * timerqueue = evt-CPU0, timerqueue =
355 * evt-CPU1
360 * 2. CPU2 starts to expire remote timers. It starts with LVL0 group
369 * --> timerqueue =
375 * --> groupevt.cpu = CPU0 groupevt.cpu =
376 * timerqueue = evt-CPU0, timerqueue =
377 * evt-CPU1
390 * in GRP0:0's timerqueue and therefore set in the CPU field of the group
397 * --> timerqueue = evt-GRP0:0
403 * --> groupevt.cpu = CPU1 groupevt.cpu =
404 * --> timerqueue = evt-CPU1 timerqueue =
414 * of the group as migrator and any needed updates within the hierarchy.
430 return !(tmc->tmgroup && tmc->online); in tmigr_is_not_available()
434 * Returns true, when @childmask corresponds to the group migrator or when the
435 * group is not active - so no migrator is set.
437 static bool tmigr_check_migrator(struct tmigr_group *group, u8 childmask) in tmigr_check_migrator() argument
441 s.state = atomic_read(&group->migr_state); in tmigr_check_migrator()
449 static bool tmigr_check_migrator_and_lonely(struct tmigr_group *group, u8 childmask) in tmigr_check_migrator_and_lonely() argument
455 s.state = atomic_read(&group->migr_state); in tmigr_check_migrator_and_lonely()
466 static bool tmigr_check_lonely(struct tmigr_group *group) in tmigr_check_lonely() argument
471 s.state = atomic_read(&group->migr_state); in tmigr_check_lonely()
479 * struct tmigr_walk - data required for walking the hierarchy
489 * in top level group only. Be aware, there could occur a
492 * parent group in walk_groups(). Then @firstexp might
497 * child group)
498 * @childmask: groupmask of child group
528 struct tmigr_group *child = NULL, *group = tmc->tmgroup; in __walk_groups() local
531 WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels); in __walk_groups()
533 if (up(group, child, data)) in __walk_groups()
536 child = group; in __walk_groups()
538 * Pairs with the store release on group connection in __walk_groups()
539 * to make sure group initialization is visible. in __walk_groups()
541 group = READ_ONCE(group->parent); in __walk_groups()
542 data->childmask = child->groupmask; in __walk_groups()
543 WARN_ON_ONCE(!data->childmask); in __walk_groups()
544 } while (group); in __walk_groups()
549 lockdep_assert_held(&tmc->lock); in walk_groups()
555 * Returns the next event of the timerqueue @group->events
557 * Removes timers with ignore flag and update next_expiry of the group. Values
558 * of the group event are updated in tmigr_update_events() only.
560 static struct tmigr_event *tmigr_next_groupevt(struct tmigr_group *group) in tmigr_next_groupevt() argument
565 lockdep_assert_held(&group->lock); in tmigr_next_groupevt()
567 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_next_groupevt()
569 while ((node = timerqueue_getnext(&group->events))) { in tmigr_next_groupevt()
572 if (!READ_ONCE(evt->ignore)) { in tmigr_next_groupevt()
573 WRITE_ONCE(group->next_expiry, evt->nextevt.expires); in tmigr_next_groupevt()
578 * Remove next timers with ignore flag, because the group lock in tmigr_next_groupevt()
581 if (!timerqueue_del(&group->events, node)) in tmigr_next_groupevt()
593 static struct tmigr_event *tmigr_next_expired_groupevt(struct tmigr_group *group, in tmigr_next_expired_groupevt() argument
596 struct tmigr_event *evt = tmigr_next_groupevt(group); in tmigr_next_expired_groupevt()
598 if (!evt || now < evt->nextevt.expires) in tmigr_next_expired_groupevt()
602 * The event is ready to expire. Remove it and update next group event. in tmigr_next_expired_groupevt()
604 timerqueue_del(&group->events, &evt->nextevt); in tmigr_next_expired_groupevt()
605 tmigr_next_groupevt(group); in tmigr_next_expired_groupevt()
610 static u64 tmigr_next_groupevt_expires(struct tmigr_group *group) in tmigr_next_groupevt_expires() argument
614 evt = tmigr_next_groupevt(group); in tmigr_next_groupevt_expires()
619 return evt->nextevt.expires; in tmigr_next_groupevt_expires()
622 static bool tmigr_active_up(struct tmigr_group *group, in tmigr_active_up() argument
630 childmask = data->childmask; in tmigr_active_up()
633 * tmigr_inactive_up(), as the group state change does not depend on the in tmigr_active_up()
636 curstate.state = atomic_read(&group->migr_state); in tmigr_active_up()
652 } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); in tmigr_active_up()
654 trace_tmigr_group_set_cpu_active(group, newstate, childmask); in tmigr_active_up()
657 * The group is active (again). The group event might be still queued in tmigr_active_up()
658 * into the parent group's timerqueue but can now be handled by the in tmigr_active_up()
659 * migrator of this group. Therefore the ignore flag for the group event in tmigr_active_up()
663 * worst case the migrator of the parent group observes the change too in tmigr_active_up()
664 * late and expires remotely all events belonging to this group. The in tmigr_active_up()
668 WRITE_ONCE(group->groupevt.ignore, true); in tmigr_active_up()
677 data.childmask = tmc->groupmask; in __tmigr_cpu_activate()
681 tmc->cpuevt.ignore = true; in __tmigr_cpu_activate()
682 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in __tmigr_cpu_activate()
688 * tmigr_cpu_activate() - set this CPU active in timer migration hierarchy
699 if (WARN_ON_ONCE(!tmc->idle)) in tmigr_cpu_activate()
702 raw_spin_lock(&tmc->lock); in tmigr_cpu_activate()
703 tmc->idle = false; in tmigr_cpu_activate()
705 raw_spin_unlock(&tmc->lock); in tmigr_cpu_activate()
711 * @data->firstexp is set to expiry of first gobal event of the (top level of
714 * The child and group states need to be read under the lock, to prevent a race
719 * This is the only place where the group event expiry value is set.
722 bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, in tmigr_update_events() argument
727 bool remote = data->remote; in tmigr_update_events()
733 raw_spin_lock(&child->lock); in tmigr_update_events()
734 raw_spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING); in tmigr_update_events()
736 childstate.state = atomic_read(&child->migr_state); in tmigr_update_events()
737 groupstate.state = atomic_read(&group->migr_state); in tmigr_update_events()
745 nextexp = child->next_expiry; in tmigr_update_events()
746 evt = &child->groupevt; in tmigr_update_events()
755 WRITE_ONCE(evt->ignore, ignore); in tmigr_update_events()
757 nextexp = data->nextexp; in tmigr_update_events()
759 first_childevt = evt = data->evt; in tmigr_update_events()
760 ignore = evt->ignore; in tmigr_update_events()
772 * - When entering this path by tmigr_new_timer(), @evt->ignore in tmigr_update_events()
774 * - tmigr_inactive_up() takes care of the propagation by in tmigr_update_events()
776 * return is possible if there is a parent, sparing group in tmigr_update_events()
779 * within the group and update next_expiry accordingly. in tmigr_update_events()
782 * single level so @group is the top level group, make sure the in tmigr_update_events()
783 * first event information of the group is updated properly and in tmigr_update_events()
786 if (ignore && !remote && group->parent) in tmigr_update_events()
789 raw_spin_lock(&group->lock); in tmigr_update_events()
792 groupstate.state = atomic_read(&group->migr_state); in tmigr_update_events()
796 * If the child event is already queued in the group, remove it from the in tmigr_update_events()
799 if (timerqueue_node_queued(&evt->nextevt)) { in tmigr_update_events()
800 if ((evt->nextevt.expires == nextexp) && !ignore) { in tmigr_update_events()
802 evt->cpu = first_childevt->cpu; in tmigr_update_events()
806 if (!timerqueue_del(&group->events, &evt->nextevt)) in tmigr_update_events()
807 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_update_events()
814 * the group is already active, there is no need to walk the in tmigr_update_events()
815 * hierarchy even if there is a parent group. in tmigr_update_events()
818 * if a remote timer handling was executed before and the group in tmigr_update_events()
820 * an enqueued timer in the non active group. The enqueued timer in tmigr_update_events()
821 * of the group needs to be propagated to a higher level to in tmigr_update_events()
827 evt->nextevt.expires = nextexp; in tmigr_update_events()
828 evt->cpu = first_childevt->cpu; in tmigr_update_events()
830 if (timerqueue_add(&group->events, &evt->nextevt)) in tmigr_update_events()
831 WRITE_ONCE(group->next_expiry, nextexp); in tmigr_update_events()
835 if (!group->parent && (groupstate.migrator == TMIGR_NONE)) { in tmigr_update_events()
840 * handling. First timer in top level group which needs to be in tmigr_update_events()
841 * handled when top level group is not active, is calculated in tmigr_update_events()
848 * The top level group is idle and it has to be ensured the in tmigr_update_events()
854 data->firstexp = tmigr_next_groupevt_expires(group); in tmigr_update_events()
857 trace_tmigr_update_events(child, group, childstate, groupstate, in tmigr_update_events()
861 raw_spin_unlock(&group->lock); in tmigr_update_events()
864 raw_spin_unlock(&child->lock); in tmigr_update_events()
869 static bool tmigr_new_timer_up(struct tmigr_group *group, in tmigr_new_timer_up() argument
873 return tmigr_update_events(group, child, data); in tmigr_new_timer_up()
885 .evt = &tmc->cpuevt }; in tmigr_new_timer()
887 lockdep_assert_held(&tmc->lock); in tmigr_new_timer()
889 if (tmc->remote) in tmigr_new_timer()
894 tmc->cpuevt.ignore = false; in tmigr_new_timer()
912 raw_spin_lock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
929 if (!tmc->online || tmc->remote || tmc->cpuevt.ignore || in tmigr_handle_remote_cpu()
930 now < tmc->cpuevt.nextevt.expires) { in tmigr_handle_remote_cpu()
931 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
937 tmc->remote = true; in tmigr_handle_remote_cpu()
938 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_handle_remote_cpu()
941 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
947 * Lock ordering needs to be preserved - timer_base locks before tmigr in tmigr_handle_remote_cpu()
949 * the top). During fetching the next timer interrupt, also tmc->lock in tmigr_handle_remote_cpu()
959 * the timerqueue and group events. in tmigr_handle_remote_cpu()
963 raw_spin_lock(&tmc->lock); in tmigr_handle_remote_cpu()
976 if (!tmc->online || !tmc->idle) { in tmigr_handle_remote_cpu()
987 data.evt = &tmc->cpuevt; in tmigr_handle_remote_cpu()
998 tmc->remote = false; in tmigr_handle_remote_cpu()
999 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
1002 static bool tmigr_handle_remote_up(struct tmigr_group *group, in tmigr_handle_remote_up() argument
1011 jif = data->basej; in tmigr_handle_remote_up()
1012 now = data->now; in tmigr_handle_remote_up()
1014 childmask = data->childmask; in tmigr_handle_remote_up()
1016 trace_tmigr_handle_remote(group); in tmigr_handle_remote_up()
1019 * Handle the group only if @childmask is the migrator or if the in tmigr_handle_remote_up()
1020 * group has no migrator. Otherwise the group is active and is in tmigr_handle_remote_up()
1023 if (!tmigr_check_migrator(group, childmask)) in tmigr_handle_remote_up()
1026 raw_spin_lock_irq(&group->lock); in tmigr_handle_remote_up()
1028 evt = tmigr_next_expired_groupevt(group, now); in tmigr_handle_remote_up()
1031 unsigned int remote_cpu = evt->cpu; in tmigr_handle_remote_up()
1033 raw_spin_unlock_irq(&group->lock); in tmigr_handle_remote_up()
1043 * (group->next_expiry was updated by tmigr_next_expired_groupevt(), in tmigr_handle_remote_up()
1046 data->firstexp = group->next_expiry; in tmigr_handle_remote_up()
1048 raw_spin_unlock_irq(&group->lock); in tmigr_handle_remote_up()
1054 * tmigr_handle_remote() - Handle global timers of remote idle CPUs
1066 data.childmask = tmc->groupmask; in tmigr_handle_remote()
1074 if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) { in tmigr_handle_remote()
1080 if (READ_ONCE(tmc->wakeup) == KTIME_MAX) in tmigr_handle_remote()
1087 * Update @tmc->wakeup only at the end and do not reset @tmc->wakeup to in tmigr_handle_remote()
1088 * KTIME_MAX. Even if tmc->lock is not held during the whole remote in tmigr_handle_remote()
1089 * handling, tmc->wakeup is fine to be stale as it is called in in tmigr_handle_remote()
1096 raw_spin_lock_irq(&tmc->lock); in tmigr_handle_remote()
1097 WRITE_ONCE(tmc->wakeup, data.firstexp); in tmigr_handle_remote()
1098 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote()
1101 static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, in tmigr_requires_handle_remote_up() argument
1107 childmask = data->childmask; in tmigr_requires_handle_remote_up()
1110 * Handle the group only if the child is the migrator or if the group in tmigr_requires_handle_remote_up()
1111 * has no migrator. Otherwise the group is active and is handled by its in tmigr_requires_handle_remote_up()
1114 if (!tmigr_check_migrator(group, childmask)) in tmigr_requires_handle_remote_up()
1118 * When there is a parent group and the CPU which triggered the in tmigr_requires_handle_remote_up()
1120 * group before reading the next_expiry value. in tmigr_requires_handle_remote_up()
1122 if (group->parent && !data->tmc_active) in tmigr_requires_handle_remote_up()
1132 data->firstexp = READ_ONCE(group->next_expiry); in tmigr_requires_handle_remote_up()
1133 if (data->now >= data->firstexp) { in tmigr_requires_handle_remote_up()
1134 data->check = true; in tmigr_requires_handle_remote_up()
1138 raw_spin_lock(&group->lock); in tmigr_requires_handle_remote_up()
1139 data->firstexp = group->next_expiry; in tmigr_requires_handle_remote_up()
1140 if (data->now >= group->next_expiry) { in tmigr_requires_handle_remote_up()
1141 data->check = true; in tmigr_requires_handle_remote_up()
1142 raw_spin_unlock(&group->lock); in tmigr_requires_handle_remote_up()
1145 raw_spin_unlock(&group->lock); in tmigr_requires_handle_remote_up()
1152 * tmigr_requires_handle_remote() - Check the need of remote timer handling
1167 data.childmask = tmc->groupmask; in tmigr_requires_handle_remote()
1169 data.tmc_active = !tmc->idle; in tmigr_requires_handle_remote()
1176 * Check is done lockless as interrupts are disabled and @tmc->idle is in tmigr_requires_handle_remote()
1179 if (!tmc->idle) { in tmigr_requires_handle_remote()
1186 * When the CPU is idle, compare @tmc->wakeup with @data.now. The lock in tmigr_requires_handle_remote()
1192 if (data.now >= READ_ONCE(tmc->wakeup)) in tmigr_requires_handle_remote()
1195 raw_spin_lock(&tmc->lock); in tmigr_requires_handle_remote()
1196 if (data.now >= tmc->wakeup) in tmigr_requires_handle_remote()
1198 raw_spin_unlock(&tmc->lock); in tmigr_requires_handle_remote()
1205 * tmigr_cpu_new_timer() - enqueue next global timer into hierarchy (idle tmc)
1210 * and thereby the timer idle path is executed once more. @tmc->wakeup
1225 raw_spin_lock(&tmc->lock); in tmigr_cpu_new_timer()
1227 ret = READ_ONCE(tmc->wakeup); in tmigr_cpu_new_timer()
1229 if (nextexp != tmc->cpuevt.nextevt.expires || in tmigr_cpu_new_timer()
1230 tmc->cpuevt.ignore) { in tmigr_cpu_new_timer()
1236 WRITE_ONCE(tmc->wakeup, ret); in tmigr_cpu_new_timer()
1240 raw_spin_unlock(&tmc->lock); in tmigr_cpu_new_timer()
1244 static bool tmigr_inactive_up(struct tmigr_group *group, in tmigr_inactive_up() argument
1252 childmask = data->childmask; in tmigr_inactive_up()
1257 * to make sure the updates of child and group states are ordered. The in tmigr_inactive_up()
1258 * ordering is mandatory, as the group state change depends on the child in tmigr_inactive_up()
1261 curstate.state = atomic_read_acquire(&group->migr_state); in tmigr_inactive_up()
1265 childstate.state = atomic_read(&child->migr_state); in tmigr_inactive_up()
1276 * Find a new migrator for the group, because the child in tmigr_inactive_up()
1277 * group is idle! in tmigr_inactive_up()
1299 if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) { in tmigr_inactive_up()
1300 trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); in tmigr_inactive_up()
1306 * tmigr_active_up() to make sure the updates of child and group in tmigr_inactive_up()
1313 data->remote = false; in tmigr_inactive_up()
1316 tmigr_update_events(group, child, data); in tmigr_inactive_up()
1325 .evt = &tmc->cpuevt, in __tmigr_cpu_deactivate()
1326 .childmask = tmc->groupmask }; in __tmigr_cpu_deactivate()
1334 tmc->cpuevt.ignore = false; in __tmigr_cpu_deactivate()
1341 * tmigr_cpu_deactivate() - Put current CPU into inactive state
1358 raw_spin_lock(&tmc->lock); in tmigr_cpu_deactivate()
1362 tmc->idle = true; in tmigr_cpu_deactivate()
1368 WRITE_ONCE(tmc->wakeup, ret); in tmigr_cpu_deactivate()
1371 raw_spin_unlock(&tmc->lock); in tmigr_cpu_deactivate()
1376 * tmigr_quick_check() - Quick forecast of next tmigr event when CPU wants to
1381 * * KTIME_MAX - when it is probable that nothing has to be done (not
1382 * the only one in the level 0 group; and if it is the
1383 * only one in level 0 group, but there are more than a
1384 * single group active on the way to top level)
1385 * * nextevt - when CPU is offline and has to handle timer on its own
1386 * or when on the way to top in every group only a single
1389 * * next_expiry - value of lowest expiry encountered while walking groups
1396 struct tmigr_group *group = tmc->tmgroup; in tmigr_quick_check() local
1401 if (WARN_ON_ONCE(tmc->idle)) in tmigr_quick_check()
1404 if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask)) in tmigr_quick_check()
1408 if (!tmigr_check_lonely(group)) in tmigr_quick_check()
1417 nextevt = min_t(u64, nextevt, READ_ONCE(group->next_expiry)); in tmigr_quick_check()
1418 group = group->parent; in tmigr_quick_check()
1419 } while (group); in tmigr_quick_check()
1425 * tmigr_trigger_active() - trigger a CPU to become active again
1435 WARN_ON_ONCE(!tmc->online || tmc->idle); in tmigr_trigger_active()
1446 raw_spin_lock_irq(&tmc->lock); in tmigr_cpu_offline()
1447 tmc->online = false; in tmigr_cpu_offline()
1448 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_cpu_offline()
1456 raw_spin_unlock_irq(&tmc->lock); in tmigr_cpu_offline()
1471 if (WARN_ON_ONCE(!tmc->tmgroup)) in tmigr_cpu_online()
1472 return -EINVAL; in tmigr_cpu_online()
1474 raw_spin_lock_irq(&tmc->lock); in tmigr_cpu_online()
1476 tmc->idle = timer_base_is_idle(); in tmigr_cpu_online()
1477 if (!tmc->idle) in tmigr_cpu_online()
1479 tmc->online = true; in tmigr_cpu_online()
1480 raw_spin_unlock_irq(&tmc->lock); in tmigr_cpu_online()
1484 static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl, in tmigr_init_group() argument
1489 raw_spin_lock_init(&group->lock); in tmigr_init_group()
1491 group->level = lvl; in tmigr_init_group()
1492 group->numa_node = lvl < tmigr_crossnode_level ? node : NUMA_NO_NODE; in tmigr_init_group()
1494 group->num_children = 0; in tmigr_init_group()
1499 atomic_set(&group->migr_state, s.state); in tmigr_init_group()
1502 * If this is a new top-level, prepare its groupmask in advance. in tmigr_init_group()
1503 * This avoids accidents where yet another new top-level is in tmigr_init_group()
1507 group->groupmask = BIT(0); in tmigr_init_group()
1513 group->num_children = 1; in tmigr_init_group()
1516 timerqueue_init_head(&group->events); in tmigr_init_group()
1517 timerqueue_init(&group->groupevt.nextevt); in tmigr_init_group()
1518 group->groupevt.nextevt.expires = KTIME_MAX; in tmigr_init_group()
1519 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_init_group()
1520 group->groupevt.ignore = true; in tmigr_init_group()
1526 struct tmigr_group *tmp, *group = NULL; in tmigr_get_group() local
1530 /* Try to attach to an existing group first */ in tmigr_get_group()
1534 * this group belongs to the same NUMA node. in tmigr_get_group()
1536 if (lvl < tmigr_crossnode_level && tmp->numa_node != node) in tmigr_get_group()
1540 if (tmp->num_children >= TMIGR_CHILDREN_PER_GROUP) in tmigr_get_group()
1545 * siblings end up in the same group of the lowest level of the in tmigr_get_group()
1550 group = tmp; in tmigr_get_group()
1554 if (group) in tmigr_get_group()
1555 return group; in tmigr_get_group()
1557 /* Allocate and set up a new group */ in tmigr_get_group()
1558 group = kzalloc_node(sizeof(*group), GFP_KERNEL, node); in tmigr_get_group()
1559 if (!group) in tmigr_get_group()
1560 return ERR_PTR(-ENOMEM); in tmigr_get_group()
1562 tmigr_init_group(group, lvl, node); in tmigr_get_group()
1565 list_add(&group->list, &tmigr_level_list[lvl]); in tmigr_get_group()
1566 trace_tmigr_group_set(group); in tmigr_get_group()
1567 return group; in tmigr_get_group()
1576 raw_spin_lock_irq(&child->lock); in tmigr_connect_child_parent()
1577 raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); in tmigr_connect_child_parent()
1582 * case groupmask is pre-initialized and @child already in tmigr_connect_child_parent()
1586 WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2); in tmigr_connect_child_parent()
1589 child->groupmask = BIT(parent->num_children++); in tmigr_connect_child_parent()
1597 smp_store_release(&child->parent, parent); in tmigr_connect_child_parent()
1599 raw_spin_unlock(&parent->lock); in tmigr_connect_child_parent()
1600 raw_spin_unlock_irq(&child->lock); in tmigr_connect_child_parent()
1610 * in the parent group: in tmigr_connect_child_parent()
1617 * * But if a new group above the current top level is required, it is in tmigr_connect_child_parent()
1620 * executed with the formerly top level group (child) and the newly in tmigr_connect_child_parent()
1621 * created group (parent). in tmigr_connect_child_parent()
1627 * level group. And the next time it goes inactive, it will release in tmigr_connect_child_parent()
1631 data.childmask = child->groupmask; in tmigr_connect_child_parent()
1639 WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); in tmigr_connect_child_parent()
1644 struct tmigr_group *group, *child, **stack; in tmigr_setup_groups() local
1650 return -ENOMEM; in tmigr_setup_groups()
1653 group = tmigr_get_group(cpu, node, i); in tmigr_setup_groups()
1654 if (IS_ERR(group)) { in tmigr_setup_groups()
1655 err = PTR_ERR(group); in tmigr_setup_groups()
1660 stack[i++] = group; in tmigr_setup_groups()
1668 * single group. in tmigr_setup_groups()
1670 if (group->parent || list_is_singular(&tmigr_level_list[i - 1])) in tmigr_setup_groups()
1676 WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top])); in tmigr_setup_groups()
1679 group = stack[--i]; in tmigr_setup_groups()
1682 list_del(&group->list); in tmigr_setup_groups()
1683 kfree(group); in tmigr_setup_groups()
1687 WARN_ON_ONCE(i != group->level); in tmigr_setup_groups()
1690 * Update tmc -> group / child -> group connection in tmigr_setup_groups()
1695 raw_spin_lock_irq(&group->lock); in tmigr_setup_groups()
1697 tmc->tmgroup = group; in tmigr_setup_groups()
1698 tmc->groupmask = BIT(group->num_children++); in tmigr_setup_groups()
1700 raw_spin_unlock_irq(&group->lock); in tmigr_setup_groups()
1707 child = stack[i - 1]; in tmigr_setup_groups()
1709 tmigr_connect_child_parent(child, group, false); in tmigr_setup_groups()
1722 * CPU's child group and pre-accounted the old root. in tmigr_setup_groups()
1724 if (group->num_children == 2 && list_is_singular(lvllist)) { in tmigr_setup_groups()
1728 * it may spuriously activate the old top level group inside in tmigr_setup_groups()
1729 * the new one (nevertheless whether old top level group is in tmigr_setup_groups()
1734 lvllist = &tmigr_level_list[top - 1]; in tmigr_setup_groups()
1736 if (child->parent) in tmigr_setup_groups()
1739 tmigr_connect_child_parent(child, group, true); in tmigr_setup_groups()
1767 if (tmc->tmgroup) in tmigr_cpu_prepare()
1770 raw_spin_lock_init(&tmc->lock); in tmigr_cpu_prepare()
1771 timerqueue_init(&tmc->cpuevt.nextevt); in tmigr_cpu_prepare()
1772 tmc->cpuevt.nextevt.expires = KTIME_MAX; in tmigr_cpu_prepare()
1773 tmc->cpuevt.ignore = true; in tmigr_cpu_prepare()
1774 tmc->cpuevt.cpu = cpu; in tmigr_cpu_prepare()
1775 tmc->remote = false; in tmigr_cpu_prepare()
1776 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_cpu_prepare()
1782 if (tmc->groupmask == 0) in tmigr_cpu_prepare()
1783 return -EINVAL; in tmigr_cpu_prepare()
1793 int ret = -ENOMEM; in tmigr_init()
1824 * If a NUMA node spawns more than one CPU level group then the next in tmigr_init()
1839 pr_info("Timer migration: %d hierarchy levels; %d children per group;" in tmigr_init()