xref: /linux/kernel/time/timer_migration.h (revision 6b3f7af57881f6d6250c6dcc4d910fe8e855a607)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _KERNEL_TIME_MIGRATION_H
3 #define _KERNEL_TIME_MIGRATION_H
4 
5 /* Per group capacity. Must be a power of 2! */
6 #define TMIGR_CHILDREN_PER_GROUP 8
7 
8 /**
9  * struct tmigr_hierarchy - a hierarchy associated to a given CPU capacity.
10  *                          Homogeneous systems have only one hierarchy.
11  *                          Heterogenous have one hierarchy per CPU capacity.
12  * @cpumask:	CPUs belonging to this hierarchy
13  * @root:	The current root of the hierarchy
14  * @capacity:	CPU capacity associated to this hierarchy
15  * @node:	Node in the global hierarchy list
16  * @level_list:	Per level lists of tmigr groups
17  */
18 struct tmigr_hierarchy {
19 	struct cpumask		*cpumask;
20 	struct tmigr_group	*root;
21 	unsigned long		capacity;
22 	struct list_head	node;
23 	struct list_head	level_list[];
24 };
25 
26 /**
27  * struct tmigr_event - a timer event associated to a CPU
28  * @nextevt:	The node to enqueue an event in the parent group queue
29  * @cpu:	The CPU to which this event belongs
30  * @ignore:	Hint whether the event could be ignored; it is set when
31  *		CPU or group is active;
32  */
33 struct tmigr_event {
34 	struct timerqueue_node	nextevt;
35 	unsigned int		cpu;
36 	bool			ignore;
37 };
38 
39 /**
40  * struct tmigr_group - timer migration hierarchy group
41  * @lock:		Lock protecting the event information and group hierarchy
42  *			information during setup
43  * @parent:		Pointer to the parent group. Pointer is updated when a
44  *			new hierarchy level is added because of a CPU coming
45  *			online the first time. Once it is set, the pointer will
46  *			not be removed or updated. When accessing parent pointer
47  *			lock less to decide whether to abort a propagation or
48  *			not, it is not a problem. The worst outcome is an
49  *			unnecessary/early CPU wake up. But do not access parent
50  *			pointer several times in the same 'action' (like
51  *			activation, deactivation, check for remote expiry,...)
52  *			without holding the lock as it is not ensured that value
53  *			will not change.
54  * @groupevt:		Next event of the group which is only used when the
55  *			group is !active. The group event is then queued into
56  *			the parent timer queue.
57  *			Ignore bit of @groupevt is set when the group is active.
58  * @next_expiry:	Base monotonic expiry time of the next event of the
59  *			group; It is used for the racy lockless check whether a
60  *			remote expiry is required; it is always reliable
61  * @events:		Timer queue for child events queued in the group
62  * @migr_state:		State of the group (see union tmigr_state)
63  * @level:		Hierarchy level of the group; Required during setup
64  * @numa_node:		Required for setup only to make sure CPU and low level
65  *			group information is NUMA local. It is set to NUMA node
66  *			as long as the group level is per NUMA node (level <
67  *			tmigr_crossnode_level); otherwise it is set to
68  *			NUMA_NO_NODE
69  * @num_children:	Counter of group children to make sure the group is only
70  *			filled with TMIGR_CHILDREN_PER_GROUP; Required for setup
71  *			only
72  * @groupmask:		mask of the group in the parent group; is set during
73  *			setup and will never change; can be read lockless
74  * @list:		List head that is added to the per level
75  *			tmigr_level_list; is required during setup when a
76  *			new group needs to be connected to the existing
77  *			hierarchy groups
78  */
79 struct tmigr_group {
80 	raw_spinlock_t		lock;
81 	struct tmigr_group	*parent;
82 	struct tmigr_event	groupevt;
83 	u64			next_expiry;
84 	struct timerqueue_head	events;
85 	atomic_t		migr_state;
86 	unsigned int		level;
87 	int			numa_node;
88 	unsigned int		num_children;
89 	u8			groupmask;
90 	struct list_head	list;
91 };
92 
93 /**
94  * struct tmigr_cpu - timer migration per CPU group
95  * @lock:		Lock protecting the tmigr_cpu group information
96  * @available:		Indicates whether the CPU is available for handling
97  *			global timers. In the deactivate path it is required to
98  *			know whether the migrator in the top level group is to
99  *			be set offline, while a timer is pending. Then another
100  *			available CPU needs to be notified to take over the
101  *			migrator role. Furthermore the information is required
102  *			in the CPU hotplug path as the CPU is able to go idle
103  *			before the timer migration hierarchy hotplug callback is
104  *			reached.  During this phase, the CPU has to handle the
105  *			global timers on its own and must not act as a migrator.
106 
107  * @idle:		Indicates whether the CPU is idle in the timer migration
108  *			hierarchy
109  * @remote:		Is set when timers of the CPU are expired remotely
110  * @tmgroup:		Pointer to the parent group
111  * @groupmask:		mask of tmigr_cpu in the parent group
112  * @wakeup:		Stores the first timer when the timer migration
113  *			hierarchy is completely idle and remote expiry was done;
114  *			is returned to timer code in the idle path and is only
115  *			used in idle path.
116  * @cpuevt:		CPU event which could be enqueued into the parent group
117  */
118 struct tmigr_cpu {
119 	raw_spinlock_t		lock;
120 	bool			available;
121 	bool			idle;
122 	bool			remote;
123 	struct tmigr_group	*tmgroup;
124 	u8			groupmask;
125 	u64			wakeup;
126 	struct tmigr_event	cpuevt;
127 };
128 
129 /**
130  * union tmigr_state - state of tmigr_group
131  * @state:	Combined version of the state - only used for atomic
132  *		read/cmpxchg function
133  * &anon struct: Split version of the state - only use the struct members to
134  *		update information to stay independent of endianness
135  * @active:	Contains each mask bit of the active children
136  * @migrator:	Contains mask of the child which is migrator
137  * @seq:	Sequence counter needs to be increased when an update
138  *		to the tmigr_state is done. It prevents a race when
139  *		updates in the child groups are propagated in changed
140  *		order. Detailed information about the scenario is
141  *		given in the documentation at the begin of
142  *		timer_migration.c.
143  */
144 union tmigr_state {
145 	u32 state;
146 	struct {
147 		u8	active;
148 		u8	migrator;
149 		u16	seq;
150 	} __packed;
151 };
152 
153 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
154 extern void tmigr_handle_remote(void);
155 extern bool tmigr_requires_handle_remote(void);
156 extern void tmigr_cpu_activate(void);
157 extern u64 tmigr_cpu_deactivate(u64 nextevt);
158 extern u64 tmigr_cpu_new_timer(u64 nextevt);
159 extern u64 tmigr_quick_check(u64 nextevt);
160 #else
161 static inline void tmigr_handle_remote(void) { }
162 static inline bool tmigr_requires_handle_remote(void) { return false; }
163 static inline void tmigr_cpu_activate(void) { }
164 #endif
165 
166 #endif
167