1*b133207dSGabriele Monaco // SPDX-License-Identifier: GPL-2.0 2*b133207dSGabriele Monaco #include <linux/ftrace.h> 3*b133207dSGabriele Monaco #include <linux/tracepoint.h> 4*b133207dSGabriele Monaco #include <linux/kernel.h> 5*b133207dSGabriele Monaco #include <linux/module.h> 6*b133207dSGabriele Monaco #include <linux/init.h> 7*b133207dSGabriele Monaco #include <linux/rv.h> 8*b133207dSGabriele Monaco #include <rv/instrumentation.h> 9*b133207dSGabriele Monaco 10*b133207dSGabriele Monaco #define MODULE_NAME "nomiss" 11*b133207dSGabriele Monaco 12*b133207dSGabriele Monaco #include <uapi/linux/sched/types.h> 13*b133207dSGabriele Monaco #include <trace/events/syscalls.h> 14*b133207dSGabriele Monaco #include <trace/events/sched.h> 15*b133207dSGabriele Monaco #include <trace/events/task.h> 16*b133207dSGabriele Monaco #include <rv_trace.h> 17*b133207dSGabriele Monaco 18*b133207dSGabriele Monaco #define RV_MON_TYPE RV_MON_PER_OBJ 19*b133207dSGabriele Monaco #define HA_TIMER_TYPE HA_TIMER_WHEEL 20*b133207dSGabriele Monaco /* The start condition is on sched_switch, it's dangerous to allocate there */ 21*b133207dSGabriele Monaco #define DA_SKIP_AUTO_ALLOC 22*b133207dSGabriele Monaco typedef struct sched_dl_entity *monitor_target; 23*b133207dSGabriele Monaco #include "nomiss.h" 24*b133207dSGabriele Monaco #include <rv/ha_monitor.h> 25*b133207dSGabriele Monaco #include <monitors/deadline/deadline.h> 26*b133207dSGabriele Monaco 27*b133207dSGabriele Monaco /* 28*b133207dSGabriele Monaco * User configurable deadline threshold. If the total utilisation of deadline 29*b133207dSGabriele Monaco * tasks is larger than 1, they are only guaranteed bounded tardiness. See 30*b133207dSGabriele Monaco * Documentation/scheduler/sched-deadline.rst for more details. 31*b133207dSGabriele Monaco * The minimum tardiness without sched_feat(HRTICK_DL) is 1 tick to accommodate 32*b133207dSGabriele Monaco * for throttle enforced on the next tick. 33*b133207dSGabriele Monaco */ 34*b133207dSGabriele Monaco static u64 deadline_thresh = TICK_NSEC; 35*b133207dSGabriele Monaco module_param(deadline_thresh, ullong, 0644); 36*b133207dSGabriele Monaco #define DEADLINE_NS(ha_mon) (ha_get_target(ha_mon)->dl_deadline + deadline_thresh) 37*b133207dSGabriele Monaco 38*b133207dSGabriele Monaco static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_nomiss env, u64 time_ns) 39*b133207dSGabriele Monaco { 40*b133207dSGabriele Monaco if (env == clk_nomiss) 41*b133207dSGabriele Monaco return ha_get_clk_ns(ha_mon, env, time_ns); 42*b133207dSGabriele Monaco else if (env == is_constr_dl_nomiss) 43*b133207dSGabriele Monaco return !dl_is_implicit(ha_get_target(ha_mon)); 44*b133207dSGabriele Monaco else if (env == is_defer_nomiss) 45*b133207dSGabriele Monaco return ha_get_target(ha_mon)->dl_defer; 46*b133207dSGabriele Monaco return ENV_INVALID_VALUE; 47*b133207dSGabriele Monaco } 48*b133207dSGabriele Monaco 49*b133207dSGabriele Monaco static void ha_reset_env(struct ha_monitor *ha_mon, enum envs_nomiss env, u64 time_ns) 50*b133207dSGabriele Monaco { 51*b133207dSGabriele Monaco if (env == clk_nomiss) 52*b133207dSGabriele Monaco ha_reset_clk_ns(ha_mon, env, time_ns); 53*b133207dSGabriele Monaco } 54*b133207dSGabriele Monaco 55*b133207dSGabriele Monaco static inline bool ha_verify_invariants(struct ha_monitor *ha_mon, 56*b133207dSGabriele Monaco enum states curr_state, enum events event, 57*b133207dSGabriele Monaco enum states next_state, u64 time_ns) 58*b133207dSGabriele Monaco { 59*b133207dSGabriele Monaco if (curr_state == ready_nomiss) 60*b133207dSGabriele Monaco return ha_check_invariant_ns(ha_mon, clk_nomiss, time_ns); 61*b133207dSGabriele Monaco else if (curr_state == running_nomiss) 62*b133207dSGabriele Monaco return ha_check_invariant_ns(ha_mon, clk_nomiss, time_ns); 63*b133207dSGabriele Monaco return true; 64*b133207dSGabriele Monaco } 65*b133207dSGabriele Monaco 66*b133207dSGabriele Monaco static inline void ha_convert_inv_guard(struct ha_monitor *ha_mon, 67*b133207dSGabriele Monaco enum states curr_state, enum events event, 68*b133207dSGabriele Monaco enum states next_state, u64 time_ns) 69*b133207dSGabriele Monaco { 70*b133207dSGabriele Monaco if (curr_state == next_state) 71*b133207dSGabriele Monaco return; 72*b133207dSGabriele Monaco if (curr_state == ready_nomiss) 73*b133207dSGabriele Monaco ha_inv_to_guard(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); 74*b133207dSGabriele Monaco else if (curr_state == running_nomiss) 75*b133207dSGabriele Monaco ha_inv_to_guard(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); 76*b133207dSGabriele Monaco } 77*b133207dSGabriele Monaco 78*b133207dSGabriele Monaco static inline bool ha_verify_guards(struct ha_monitor *ha_mon, 79*b133207dSGabriele Monaco enum states curr_state, enum events event, 80*b133207dSGabriele Monaco enum states next_state, u64 time_ns) 81*b133207dSGabriele Monaco { 82*b133207dSGabriele Monaco bool res = true; 83*b133207dSGabriele Monaco 84*b133207dSGabriele Monaco if (curr_state == ready_nomiss && event == dl_replenish_nomiss) 85*b133207dSGabriele Monaco ha_reset_env(ha_mon, clk_nomiss, time_ns); 86*b133207dSGabriele Monaco else if (curr_state == ready_nomiss && event == dl_throttle_nomiss) 87*b133207dSGabriele Monaco res = ha_get_env(ha_mon, is_defer_nomiss, time_ns) == 1ull; 88*b133207dSGabriele Monaco else if (curr_state == idle_nomiss && event == dl_replenish_nomiss) 89*b133207dSGabriele Monaco ha_reset_env(ha_mon, clk_nomiss, time_ns); 90*b133207dSGabriele Monaco else if (curr_state == running_nomiss && event == dl_replenish_nomiss) 91*b133207dSGabriele Monaco ha_reset_env(ha_mon, clk_nomiss, time_ns); 92*b133207dSGabriele Monaco else if (curr_state == sleeping_nomiss && event == dl_replenish_nomiss) 93*b133207dSGabriele Monaco ha_reset_env(ha_mon, clk_nomiss, time_ns); 94*b133207dSGabriele Monaco else if (curr_state == sleeping_nomiss && event == dl_throttle_nomiss) 95*b133207dSGabriele Monaco res = ha_get_env(ha_mon, is_constr_dl_nomiss, time_ns) == 1ull || 96*b133207dSGabriele Monaco ha_get_env(ha_mon, is_defer_nomiss, time_ns) == 1ull; 97*b133207dSGabriele Monaco else if (curr_state == throttled_nomiss && event == dl_replenish_nomiss) 98*b133207dSGabriele Monaco ha_reset_env(ha_mon, clk_nomiss, time_ns); 99*b133207dSGabriele Monaco return res; 100*b133207dSGabriele Monaco } 101*b133207dSGabriele Monaco 102*b133207dSGabriele Monaco static inline void ha_setup_invariants(struct ha_monitor *ha_mon, 103*b133207dSGabriele Monaco enum states curr_state, enum events event, 104*b133207dSGabriele Monaco enum states next_state, u64 time_ns) 105*b133207dSGabriele Monaco { 106*b133207dSGabriele Monaco if (next_state == curr_state && event != dl_replenish_nomiss) 107*b133207dSGabriele Monaco return; 108*b133207dSGabriele Monaco if (next_state == ready_nomiss) 109*b133207dSGabriele Monaco ha_start_timer_ns(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); 110*b133207dSGabriele Monaco else if (next_state == running_nomiss) 111*b133207dSGabriele Monaco ha_start_timer_ns(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); 112*b133207dSGabriele Monaco else if (curr_state == ready_nomiss) 113*b133207dSGabriele Monaco ha_cancel_timer(ha_mon); 114*b133207dSGabriele Monaco else if (curr_state == running_nomiss) 115*b133207dSGabriele Monaco ha_cancel_timer(ha_mon); 116*b133207dSGabriele Monaco } 117*b133207dSGabriele Monaco 118*b133207dSGabriele Monaco static bool ha_verify_constraint(struct ha_monitor *ha_mon, 119*b133207dSGabriele Monaco enum states curr_state, enum events event, 120*b133207dSGabriele Monaco enum states next_state, u64 time_ns) 121*b133207dSGabriele Monaco { 122*b133207dSGabriele Monaco if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns)) 123*b133207dSGabriele Monaco return false; 124*b133207dSGabriele Monaco 125*b133207dSGabriele Monaco ha_convert_inv_guard(ha_mon, curr_state, event, next_state, time_ns); 126*b133207dSGabriele Monaco 127*b133207dSGabriele Monaco if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns)) 128*b133207dSGabriele Monaco return false; 129*b133207dSGabriele Monaco 130*b133207dSGabriele Monaco ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns); 131*b133207dSGabriele Monaco 132*b133207dSGabriele Monaco return true; 133*b133207dSGabriele Monaco } 134*b133207dSGabriele Monaco 135*b133207dSGabriele Monaco static void handle_dl_replenish(void *data, struct sched_dl_entity *dl_se, 136*b133207dSGabriele Monaco int cpu, u8 type) 137*b133207dSGabriele Monaco { 138*b133207dSGabriele Monaco if (is_supported_type(type)) 139*b133207dSGabriele Monaco da_handle_event(EXPAND_ID(dl_se, cpu, type), dl_replenish_nomiss); 140*b133207dSGabriele Monaco } 141*b133207dSGabriele Monaco 142*b133207dSGabriele Monaco static void handle_dl_throttle(void *data, struct sched_dl_entity *dl_se, 143*b133207dSGabriele Monaco int cpu, u8 type) 144*b133207dSGabriele Monaco { 145*b133207dSGabriele Monaco if (is_supported_type(type)) 146*b133207dSGabriele Monaco da_handle_event(EXPAND_ID(dl_se, cpu, type), dl_throttle_nomiss); 147*b133207dSGabriele Monaco } 148*b133207dSGabriele Monaco 149*b133207dSGabriele Monaco static void handle_dl_server_stop(void *data, struct sched_dl_entity *dl_se, 150*b133207dSGabriele Monaco int cpu, u8 type) 151*b133207dSGabriele Monaco { 152*b133207dSGabriele Monaco /* 153*b133207dSGabriele Monaco * This isn't the standard use of da_handle_start_run_event since this 154*b133207dSGabriele Monaco * event cannot only occur from the initial state. 155*b133207dSGabriele Monaco * It is fine to use here because it always brings to a known state and 156*b133207dSGabriele Monaco * the fact we "pretend" the transition starts from the initial state 157*b133207dSGabriele Monaco * has no side effect. 158*b133207dSGabriele Monaco */ 159*b133207dSGabriele Monaco if (is_supported_type(type)) 160*b133207dSGabriele Monaco da_handle_start_run_event(EXPAND_ID(dl_se, cpu, type), dl_server_stop_nomiss); 161*b133207dSGabriele Monaco } 162*b133207dSGabriele Monaco 163*b133207dSGabriele Monaco static inline void handle_server_switch(struct task_struct *next, int cpu, u8 type) 164*b133207dSGabriele Monaco { 165*b133207dSGabriele Monaco struct sched_dl_entity *dl_se = get_server(next, type); 166*b133207dSGabriele Monaco 167*b133207dSGabriele Monaco if (dl_se && is_idle_task(next)) 168*b133207dSGabriele Monaco da_handle_event(EXPAND_ID(dl_se, cpu, type), dl_server_idle_nomiss); 169*b133207dSGabriele Monaco } 170*b133207dSGabriele Monaco 171*b133207dSGabriele Monaco static void handle_sched_switch(void *data, bool preempt, 172*b133207dSGabriele Monaco struct task_struct *prev, 173*b133207dSGabriele Monaco struct task_struct *next, 174*b133207dSGabriele Monaco unsigned int prev_state) 175*b133207dSGabriele Monaco { 176*b133207dSGabriele Monaco int cpu = task_cpu(next); 177*b133207dSGabriele Monaco 178*b133207dSGabriele Monaco if (prev_state != TASK_RUNNING && !preempt && prev->policy == SCHED_DEADLINE) 179*b133207dSGabriele Monaco da_handle_event(EXPAND_ID_TASK(prev), sched_switch_suspend_nomiss); 180*b133207dSGabriele Monaco if (next->policy == SCHED_DEADLINE) 181*b133207dSGabriele Monaco da_handle_start_run_event(EXPAND_ID_TASK(next), sched_switch_in_nomiss); 182*b133207dSGabriele Monaco 183*b133207dSGabriele Monaco /* 184*b133207dSGabriele Monaco * The server is available in next only if the next task is boosted, 185*b133207dSGabriele Monaco * otherwise we need to retrieve it. 186*b133207dSGabriele Monaco * Here the server continues in the state running/armed until actually 187*b133207dSGabriele Monaco * stopped, this works since we continue expecting a throttle. 188*b133207dSGabriele Monaco */ 189*b133207dSGabriele Monaco if (next->dl_server) 190*b133207dSGabriele Monaco da_handle_start_event(EXPAND_ID(next->dl_server, cpu, 191*b133207dSGabriele Monaco get_server_type(next)), 192*b133207dSGabriele Monaco sched_switch_in_nomiss); 193*b133207dSGabriele Monaco else { 194*b133207dSGabriele Monaco handle_server_switch(next, cpu, DL_SERVER_FAIR); 195*b133207dSGabriele Monaco if (IS_ENABLED(CONFIG_SCHED_CLASS_EXT)) 196*b133207dSGabriele Monaco handle_server_switch(next, cpu, DL_SERVER_EXT); 197*b133207dSGabriele Monaco } 198*b133207dSGabriele Monaco } 199*b133207dSGabriele Monaco 200*b133207dSGabriele Monaco static void handle_sys_enter(void *data, struct pt_regs *regs, long id) 201*b133207dSGabriele Monaco { 202*b133207dSGabriele Monaco struct task_struct *p; 203*b133207dSGabriele Monaco int new_policy = -1; 204*b133207dSGabriele Monaco pid_t pid = 0; 205*b133207dSGabriele Monaco 206*b133207dSGabriele Monaco new_policy = extract_params(regs, id, &pid); 207*b133207dSGabriele Monaco if (new_policy < 0) 208*b133207dSGabriele Monaco return; 209*b133207dSGabriele Monaco guard(rcu)(); 210*b133207dSGabriele Monaco p = pid ? find_task_by_vpid(pid) : current; 211*b133207dSGabriele Monaco if (unlikely(!p) || new_policy == p->policy) 212*b133207dSGabriele Monaco return; 213*b133207dSGabriele Monaco 214*b133207dSGabriele Monaco if (p->policy == SCHED_DEADLINE) 215*b133207dSGabriele Monaco da_reset(EXPAND_ID_TASK(p)); 216*b133207dSGabriele Monaco else if (new_policy == SCHED_DEADLINE) 217*b133207dSGabriele Monaco da_create_or_get(EXPAND_ID_TASK(p)); 218*b133207dSGabriele Monaco } 219*b133207dSGabriele Monaco 220*b133207dSGabriele Monaco static void handle_sched_wakeup(void *data, struct task_struct *tsk) 221*b133207dSGabriele Monaco { 222*b133207dSGabriele Monaco if (tsk->policy == SCHED_DEADLINE) 223*b133207dSGabriele Monaco da_handle_event(EXPAND_ID_TASK(tsk), sched_wakeup_nomiss); 224*b133207dSGabriele Monaco } 225*b133207dSGabriele Monaco 226*b133207dSGabriele Monaco static int enable_nomiss(void) 227*b133207dSGabriele Monaco { 228*b133207dSGabriele Monaco int retval; 229*b133207dSGabriele Monaco 230*b133207dSGabriele Monaco retval = da_monitor_init(); 231*b133207dSGabriele Monaco if (retval) 232*b133207dSGabriele Monaco return retval; 233*b133207dSGabriele Monaco 234*b133207dSGabriele Monaco retval = init_storage(false); 235*b133207dSGabriele Monaco if (retval) 236*b133207dSGabriele Monaco return retval; 237*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sched_dl_replenish_tp, handle_dl_replenish); 238*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sched_dl_throttle_tp, handle_dl_throttle); 239*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sched_dl_server_stop_tp, handle_dl_server_stop); 240*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sched_switch, handle_sched_switch); 241*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sched_wakeup, handle_sched_wakeup); 242*b133207dSGabriele Monaco if (!should_skip_syscall_handle()) 243*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sys_enter, handle_sys_enter); 244*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", task_newtask, handle_newtask); 245*b133207dSGabriele Monaco rv_attach_trace_probe("nomiss", sched_process_exit, handle_exit); 246*b133207dSGabriele Monaco 247*b133207dSGabriele Monaco return 0; 248*b133207dSGabriele Monaco } 249*b133207dSGabriele Monaco 250*b133207dSGabriele Monaco static void disable_nomiss(void) 251*b133207dSGabriele Monaco { 252*b133207dSGabriele Monaco rv_this.enabled = 0; 253*b133207dSGabriele Monaco 254*b133207dSGabriele Monaco /* Those are RCU writers, detach earlier hoping to close a bit faster */ 255*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", task_newtask, handle_newtask); 256*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sched_process_exit, handle_exit); 257*b133207dSGabriele Monaco if (!should_skip_syscall_handle()) 258*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sys_enter, handle_sys_enter); 259*b133207dSGabriele Monaco 260*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sched_dl_replenish_tp, handle_dl_replenish); 261*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sched_dl_throttle_tp, handle_dl_throttle); 262*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sched_dl_server_stop_tp, handle_dl_server_stop); 263*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sched_switch, handle_sched_switch); 264*b133207dSGabriele Monaco rv_detach_trace_probe("nomiss", sched_wakeup, handle_sched_wakeup); 265*b133207dSGabriele Monaco 266*b133207dSGabriele Monaco da_monitor_destroy(); 267*b133207dSGabriele Monaco } 268*b133207dSGabriele Monaco 269*b133207dSGabriele Monaco static struct rv_monitor rv_this = { 270*b133207dSGabriele Monaco .name = "nomiss", 271*b133207dSGabriele Monaco .description = "dl entities run to completion before their deadline.", 272*b133207dSGabriele Monaco .enable = enable_nomiss, 273*b133207dSGabriele Monaco .disable = disable_nomiss, 274*b133207dSGabriele Monaco .reset = da_monitor_reset_all, 275*b133207dSGabriele Monaco .enabled = 0, 276*b133207dSGabriele Monaco }; 277*b133207dSGabriele Monaco 278*b133207dSGabriele Monaco static int __init register_nomiss(void) 279*b133207dSGabriele Monaco { 280*b133207dSGabriele Monaco return rv_register_monitor(&rv_this, &rv_deadline); 281*b133207dSGabriele Monaco } 282*b133207dSGabriele Monaco 283*b133207dSGabriele Monaco static void __exit unregister_nomiss(void) 284*b133207dSGabriele Monaco { 285*b133207dSGabriele Monaco rv_unregister_monitor(&rv_this); 286*b133207dSGabriele Monaco } 287*b133207dSGabriele Monaco 288*b133207dSGabriele Monaco module_init(register_nomiss); 289*b133207dSGabriele Monaco module_exit(unregister_nomiss); 290*b133207dSGabriele Monaco 291*b133207dSGabriele Monaco MODULE_LICENSE("GPL"); 292*b133207dSGabriele Monaco MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); 293*b133207dSGabriele Monaco MODULE_DESCRIPTION("nomiss: dl entities run to completion before their deadline."); 294