xref: /linux/arch/s390/kernel/vtime.c (revision ad5a9e14ec8b4a868fea13a9dfa1fb38b2c35354)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *    Virtual cpu timer based timer functions.
4  *
5  *    Copyright IBM Corp. 2004, 2012
6  *    Author(s): Jan Glauber <jan.glauber@de.ibm.com>
7  */
8 
9 #include <linux/kernel_stat.h>
10 #include <linux/export.h>
11 #include <linux/kernel.h>
12 #include <linux/timex.h>
13 #include <linux/types.h>
14 #include <linux/time.h>
15 #include <asm/alternative.h>
16 #include <asm/cputime.h>
17 #include <asm/vtimer.h>
18 #include <asm/vtime.h>
19 #include <asm/cpu_mf.h>
20 #include <asm/idle.h>
21 #include <asm/smp.h>
22 
23 #include "entry.h"
24 
25 static void virt_timer_expire(void);
26 
27 static LIST_HEAD(virt_timer_list);
28 static DEFINE_SPINLOCK(virt_timer_lock);
29 static atomic64_t virt_timer_current;
30 static atomic64_t virt_timer_elapsed;
31 
32 DEFINE_PER_CPU(u64, mt_cycles[8]);
33 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
34 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
35 static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
36 
37 static inline void set_vtimer(u64 expires)
38 {
39 	struct lowcore *lc = get_lowcore();
40 	u64 timer;
41 
42 	asm volatile(
43 		"	stpt	%0\n"	/* Store current cpu timer value */
44 		"	spt	%1"	/* Set new value imm. afterwards */
45 		: "=Q" (timer) : "Q" (expires));
46 	lc->system_timer += lc->last_update_timer - timer;
47 	lc->last_update_timer = expires;
48 }
49 
50 static inline int virt_timer_forward(u64 elapsed)
51 {
52 	lockdep_assert_irqs_disabled();
53 	if (list_empty(&virt_timer_list))
54 		return 0;
55 	elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
56 	return elapsed >= atomic64_read(&virt_timer_current);
57 }
58 
59 static void update_mt_scaling(void)
60 {
61 	u64 cycles_new[8], *cycles_old;
62 	u64 delta, fac, mult, div;
63 	int i;
64 
65 	stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new);
66 	cycles_old = this_cpu_ptr(mt_cycles);
67 	fac = 1;
68 	mult = div = 0;
69 	for (i = 0; i <= smp_cpu_mtid; i++) {
70 		delta = cycles_new[i] - cycles_old[i];
71 		div += delta;
72 		mult *= i + 1;
73 		mult += delta * fac;
74 		fac *= i + 1;
75 	}
76 	div *= fac;
77 	if (div > 0) {
78 		/* Update scaling factor */
79 		__this_cpu_write(mt_scaling_mult, mult);
80 		__this_cpu_write(mt_scaling_div, div);
81 		memcpy(cycles_old, cycles_new,
82 		       sizeof(u64) * (smp_cpu_mtid + 1));
83 	}
84 	__this_cpu_write(mt_scaling_jiffies, jiffies_64);
85 }
86 
87 static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new)
88 {
89 	u64 delta;
90 
91 	delta = new - *tsk_vtime;
92 	*tsk_vtime = new;
93 	return delta;
94 }
95 
96 
97 static inline u64 scale_vtime(u64 vtime)
98 {
99 	u64 mult = __this_cpu_read(mt_scaling_mult);
100 	u64 div = __this_cpu_read(mt_scaling_div);
101 
102 	if (smp_cpu_mtid)
103 		return vtime * mult / div;
104 	return vtime;
105 }
106 
107 static void account_system_index_scaled(struct task_struct *p, u64 cputime,
108 					enum cpu_usage_stat index)
109 {
110 	p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
111 	account_system_index_time(p, cputime_to_nsecs(cputime), index);
112 }
113 
114 static inline void vtime_reset_last_update(struct lowcore *lc)
115 {
116 	asm volatile(
117 		"	stpt	%0\n"	/* Store current cpu timer value */
118 		"	stckf	%1"	/* Store current tod clock value */
119 		: "=Q" (lc->last_update_timer),
120 		  "=Q" (lc->last_update_clock)
121 		: : "cc");
122 }
123 
124 /*
125  * Update process times based on virtual cpu times stored by entry.S
126  * to the lowcore fields user_timer, system_timer & steal_clock.
127  */
128 static int do_account_vtime(struct task_struct *tsk)
129 {
130 	u64 timer, clock, user, guest, system, hardirq, softirq;
131 	struct lowcore *lc = get_lowcore();
132 
133 	timer = lc->last_update_timer;
134 	clock = lc->last_update_clock;
135 
136 	vtime_reset_last_update(lc);
137 
138 	clock = lc->last_update_clock - clock;
139 	timer -= lc->last_update_timer;
140 
141 	if (hardirq_count())
142 		lc->hardirq_timer += timer;
143 	else if (in_serving_softirq())
144 		lc->softirq_timer += timer;
145 	else
146 		lc->system_timer += timer;
147 
148 	/* Update MT utilization calculation */
149 	if (smp_cpu_mtid && time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies)))
150 		update_mt_scaling();
151 
152 	/* Calculate cputime delta */
153 	user = update_tsk_timer(&tsk->thread.user_timer, lc->user_timer);
154 	guest = update_tsk_timer(&tsk->thread.guest_timer, lc->guest_timer);
155 	system = update_tsk_timer(&tsk->thread.system_timer, lc->system_timer);
156 	hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, lc->hardirq_timer);
157 	softirq = update_tsk_timer(&tsk->thread.softirq_timer, lc->softirq_timer);
158 	lc->steal_timer += clock - user - guest - system - hardirq - softirq;
159 
160 	/* Push account value */
161 	if (user) {
162 		account_user_time(tsk, cputime_to_nsecs(user));
163 		tsk->utimescaled += cputime_to_nsecs(scale_vtime(user));
164 	}
165 
166 	if (guest) {
167 		account_guest_time(tsk, cputime_to_nsecs(guest));
168 		tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest));
169 	}
170 
171 	if (system)
172 		account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
173 	if (hardirq)
174 		account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
175 	if (softirq)
176 		account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
177 
178 	return virt_timer_forward(user + guest + system + hardirq + softirq);
179 }
180 
181 void vtime_task_switch(struct task_struct *prev)
182 {
183 	struct lowcore *lc = get_lowcore();
184 
185 	do_account_vtime(prev);
186 	prev->thread.user_timer = lc->user_timer;
187 	prev->thread.guest_timer = lc->guest_timer;
188 	prev->thread.system_timer = lc->system_timer;
189 	prev->thread.hardirq_timer = lc->hardirq_timer;
190 	prev->thread.softirq_timer = lc->softirq_timer;
191 	lc->user_timer = current->thread.user_timer;
192 	lc->guest_timer = current->thread.guest_timer;
193 	lc->system_timer = current->thread.system_timer;
194 	lc->hardirq_timer = current->thread.hardirq_timer;
195 	lc->softirq_timer = current->thread.softirq_timer;
196 }
197 
198 /*
199  * In s390, accounting pending user time also implies
200  * accounting system time in order to correctly compute
201  * the stolen time accounting.
202  */
203 void vtime_flush(struct task_struct *tsk)
204 {
205 	struct lowcore *lc = get_lowcore();
206 	u64 steal, avg_steal;
207 
208 	if (do_account_vtime(tsk))
209 		virt_timer_expire();
210 
211 	steal = lc->steal_timer;
212 	avg_steal = lc->avg_steal_timer;
213 	if ((s64) steal > 0) {
214 		lc->steal_timer = 0;
215 		account_steal_time(cputime_to_nsecs(steal));
216 		avg_steal += steal;
217 	}
218 	lc->avg_steal_timer = avg_steal / 2;
219 }
220 
221 static u64 vtime_delta(void)
222 {
223 	struct lowcore *lc = get_lowcore();
224 	u64 timer = lc->last_update_timer;
225 
226 	lc->last_update_timer = get_cpu_timer();
227 	return timer - lc->last_update_timer;
228 }
229 
230 void vtime_account_kernel(struct task_struct *tsk)
231 {
232 	struct lowcore *lc = get_lowcore();
233 	u64 delta = vtime_delta();
234 
235 	if (tsk->flags & PF_VCPU)
236 		lc->guest_timer += delta;
237 	else
238 		lc->system_timer += delta;
239 }
240 EXPORT_SYMBOL_GPL(vtime_account_kernel);
241 
242 void vtime_account_softirq(struct task_struct *tsk)
243 {
244 	if (!__this_cpu_read(s390_idle.idle_dyntick))
245 		get_lowcore()->softirq_timer += vtime_delta();
246 	else
247 		vtime_flush(tsk);
248 }
249 
250 void vtime_account_hardirq(struct task_struct *tsk)
251 {
252 	if (!__this_cpu_read(s390_idle.idle_dyntick)) {
253 		get_lowcore()->hardirq_timer += vtime_delta();
254 	} else {
255 		/*
256 		 * In dynticks mode, the idle cputime is accounted by the nohz
257 		 * subsystem. Therefore the s390 timer/clocks are reset on IRQ
258 		 * entry and steal time must be accounted now.
259 		 */
260 		vtime_flush(tsk);
261 	}
262 }
263 
264 #ifdef CONFIG_NO_HZ_COMMON
265 /**
266  * vtime_reset - Fast forward vtime entry clocks
267  *
268  * Called from dynticks idle IRQ entry to fast-forward the clocks to current time
269  * so that the IRQ time is still accounted by vtime while nohz cputime is paused.
270  */
271 void vtime_reset(void)
272 {
273 	vtime_reset_last_update(get_lowcore());
274 }
275 
276 /**
277  * vtime_dyntick_start - Inform vtime about entry to idle-dynticks
278  *
279  * Called when idle enters in dyntick mode. The idle cputime that elapsed so far
280  * is flushed and the tick subsystem takes over the idle cputime accounting.
281  */
282 void vtime_dyntick_start(void)
283 {
284 	__this_cpu_write(s390_idle.idle_dyntick, true);
285 	vtime_flush(current);
286 }
287 
288 /**
289  * vtime_dyntick_stop - Inform vtime about exit from idle-dynticks
290  *
291  * Called when idle exits from dyntick mode. The vtime entry clocks are
292  * fast-forward to current time and idle accounting resumes.
293  */
294 void vtime_dyntick_stop(void)
295 {
296 	vtime_reset_last_update(get_lowcore());
297 	__this_cpu_write(s390_idle.idle_dyntick, false);
298 }
299 #endif /* CONFIG_NO_HZ_COMMON */
300 
301 /*
302  * Sorted add to a list. List is linear searched until first bigger
303  * element is found.
304  */
305 static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
306 {
307 	struct vtimer_list *tmp;
308 
309 	list_for_each_entry(tmp, head, entry) {
310 		if (tmp->expires > timer->expires) {
311 			list_add_tail(&timer->entry, &tmp->entry);
312 			return;
313 		}
314 	}
315 	list_add_tail(&timer->entry, head);
316 }
317 
318 /*
319  * Handler for expired virtual CPU timer.
320  */
321 static void virt_timer_expire(void)
322 {
323 	struct vtimer_list *timer, *tmp;
324 	unsigned long elapsed;
325 	LIST_HEAD(cb_list);
326 
327 	/* walk timer list, fire all expired timers */
328 	spin_lock(&virt_timer_lock);
329 	elapsed = atomic64_read(&virt_timer_elapsed);
330 	list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
331 		if (timer->expires < elapsed)
332 			/* move expired timer to the callback queue */
333 			list_move_tail(&timer->entry, &cb_list);
334 		else
335 			timer->expires -= elapsed;
336 	}
337 	if (!list_empty(&virt_timer_list)) {
338 		timer = list_first_entry(&virt_timer_list,
339 					 struct vtimer_list, entry);
340 		atomic64_set(&virt_timer_current, timer->expires);
341 	}
342 	atomic64_sub(elapsed, &virt_timer_elapsed);
343 	spin_unlock(&virt_timer_lock);
344 
345 	/* Do callbacks and recharge periodic timers */
346 	list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
347 		list_del_init(&timer->entry);
348 		timer->function(timer->data);
349 		if (timer->interval) {
350 			/* Recharge interval timer */
351 			timer->expires = timer->interval +
352 				atomic64_read(&virt_timer_elapsed);
353 			spin_lock(&virt_timer_lock);
354 			list_add_sorted(timer, &virt_timer_list);
355 			spin_unlock(&virt_timer_lock);
356 		}
357 	}
358 }
359 
360 void init_virt_timer(struct vtimer_list *timer)
361 {
362 	timer->function = NULL;
363 	INIT_LIST_HEAD(&timer->entry);
364 }
365 EXPORT_SYMBOL(init_virt_timer);
366 
367 static inline int vtimer_pending(struct vtimer_list *timer)
368 {
369 	return !list_empty(&timer->entry);
370 }
371 
372 static void internal_add_vtimer(struct vtimer_list *timer)
373 {
374 	if (list_empty(&virt_timer_list)) {
375 		/* First timer, just program it. */
376 		atomic64_set(&virt_timer_current, timer->expires);
377 		atomic64_set(&virt_timer_elapsed, 0);
378 		list_add(&timer->entry, &virt_timer_list);
379 	} else {
380 		/* Update timer against current base. */
381 		timer->expires += atomic64_read(&virt_timer_elapsed);
382 		if (likely((s64) timer->expires <
383 			   (s64) atomic64_read(&virt_timer_current)))
384 			/* The new timer expires before the current timer. */
385 			atomic64_set(&virt_timer_current, timer->expires);
386 		/* Insert new timer into the list. */
387 		list_add_sorted(timer, &virt_timer_list);
388 	}
389 }
390 
391 static void __add_vtimer(struct vtimer_list *timer, int periodic)
392 {
393 	unsigned long flags;
394 
395 	timer->interval = periodic ? timer->expires : 0;
396 	spin_lock_irqsave(&virt_timer_lock, flags);
397 	internal_add_vtimer(timer);
398 	spin_unlock_irqrestore(&virt_timer_lock, flags);
399 }
400 
401 /*
402  * add_virt_timer - add a oneshot virtual CPU timer
403  */
404 void add_virt_timer(struct vtimer_list *timer)
405 {
406 	__add_vtimer(timer, 0);
407 }
408 EXPORT_SYMBOL(add_virt_timer);
409 
410 /*
411  * add_virt_timer_int - add an interval virtual CPU timer
412  */
413 void add_virt_timer_periodic(struct vtimer_list *timer)
414 {
415 	__add_vtimer(timer, 1);
416 }
417 EXPORT_SYMBOL(add_virt_timer_periodic);
418 
419 static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
420 {
421 	unsigned long flags;
422 	int rc;
423 
424 	BUG_ON(!timer->function);
425 
426 	if (timer->expires == expires && vtimer_pending(timer))
427 		return 1;
428 	spin_lock_irqsave(&virt_timer_lock, flags);
429 	rc = vtimer_pending(timer);
430 	if (rc)
431 		list_del_init(&timer->entry);
432 	timer->interval = periodic ? expires : 0;
433 	timer->expires = expires;
434 	internal_add_vtimer(timer);
435 	spin_unlock_irqrestore(&virt_timer_lock, flags);
436 	return rc;
437 }
438 
439 /*
440  * returns whether it has modified a pending timer (1) or not (0)
441  */
442 int mod_virt_timer(struct vtimer_list *timer, u64 expires)
443 {
444 	return __mod_vtimer(timer, expires, 0);
445 }
446 EXPORT_SYMBOL(mod_virt_timer);
447 
448 /*
449  * returns whether it has modified a pending timer (1) or not (0)
450  */
451 int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
452 {
453 	return __mod_vtimer(timer, expires, 1);
454 }
455 EXPORT_SYMBOL(mod_virt_timer_periodic);
456 
457 /*
458  * Delete a virtual timer.
459  *
460  * returns whether the deleted timer was pending (1) or not (0)
461  */
462 int del_virt_timer(struct vtimer_list *timer)
463 {
464 	unsigned long flags;
465 
466 	if (!vtimer_pending(timer))
467 		return 0;
468 	spin_lock_irqsave(&virt_timer_lock, flags);
469 	list_del_init(&timer->entry);
470 	spin_unlock_irqrestore(&virt_timer_lock, flags);
471 	return 1;
472 }
473 EXPORT_SYMBOL(del_virt_timer);
474 
475 /*
476  * Start the virtual CPU timer on the current CPU.
477  */
478 void vtime_init(void)
479 {
480 	/* set initial cpu timer */
481 	set_vtimer(VTIMER_MAX_SLICE);
482 	/* Setup initial MT scaling values */
483 	if (smp_cpu_mtid) {
484 		__this_cpu_write(mt_scaling_jiffies, jiffies);
485 		__this_cpu_write(mt_scaling_mult, 1);
486 		__this_cpu_write(mt_scaling_div, 1);
487 		stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
488 	}
489 }
490