12b5067a8SAxel Rasmussen // SPDX-License-Identifier: GPL-2.0 22b5067a8SAxel Rasmussen #define CREATE_TRACE_POINTS 32b5067a8SAxel Rasmussen #include <trace/events/mmap_lock.h> 42b5067a8SAxel Rasmussen 52b5067a8SAxel Rasmussen #include <linux/mm.h> 62b5067a8SAxel Rasmussen #include <linux/cgroup.h> 72b5067a8SAxel Rasmussen #include <linux/memcontrol.h> 82b5067a8SAxel Rasmussen #include <linux/mmap_lock.h> 92b5067a8SAxel Rasmussen #include <linux/mutex.h> 102b5067a8SAxel Rasmussen #include <linux/percpu.h> 112b5067a8SAxel Rasmussen #include <linux/rcupdate.h> 122b5067a8SAxel Rasmussen #include <linux/smp.h> 132b5067a8SAxel Rasmussen #include <linux/trace_events.h> 14832b5072SNicolas Saenz Julienne #include <linux/local_lock.h> 152b5067a8SAxel Rasmussen 162b5067a8SAxel Rasmussen EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); 172b5067a8SAxel Rasmussen EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); 182b5067a8SAxel Rasmussen EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); 192b5067a8SAxel Rasmussen 202b5067a8SAxel Rasmussen #ifdef CONFIG_MEMCG 212b5067a8SAxel Rasmussen 222b5067a8SAxel Rasmussen /* 232b5067a8SAxel Rasmussen * Our various events all share the same buffer (because we don't want or need 242b5067a8SAxel Rasmussen * to allocate a set of buffers *per event type*), so we need to protect against 252b5067a8SAxel Rasmussen * concurrent _reg() and _unreg() calls, and count how many _reg() calls have 262b5067a8SAxel Rasmussen * been made. 272b5067a8SAxel Rasmussen */ 282b5067a8SAxel Rasmussen static DEFINE_MUTEX(reg_lock); 292b5067a8SAxel Rasmussen static int reg_refcount; /* Protected by reg_lock. */ 302b5067a8SAxel Rasmussen 312b5067a8SAxel Rasmussen /* 322b5067a8SAxel Rasmussen * Size of the buffer for memcg path names. Ignoring stack trace support, 332b5067a8SAxel Rasmussen * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it. 342b5067a8SAxel Rasmussen */ 352b5067a8SAxel Rasmussen #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL 362b5067a8SAxel Rasmussen 372b5067a8SAxel Rasmussen /* 382b5067a8SAxel Rasmussen * How many contexts our trace events might be called in: normal, softirq, irq, 392b5067a8SAxel Rasmussen * and NMI. 402b5067a8SAxel Rasmussen */ 412b5067a8SAxel Rasmussen #define CONTEXT_COUNT 4 422b5067a8SAxel Rasmussen 43832b5072SNicolas Saenz Julienne struct memcg_path { 44832b5072SNicolas Saenz Julienne local_lock_t lock; 45832b5072SNicolas Saenz Julienne char __rcu *buf; 46832b5072SNicolas Saenz Julienne local_t buf_idx; 47832b5072SNicolas Saenz Julienne }; 48832b5072SNicolas Saenz Julienne static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = { 49832b5072SNicolas Saenz Julienne .lock = INIT_LOCAL_LOCK(lock), 50832b5072SNicolas Saenz Julienne .buf_idx = LOCAL_INIT(0), 51832b5072SNicolas Saenz Julienne }; 52832b5072SNicolas Saenz Julienne 532b5067a8SAxel Rasmussen static char **tmp_bufs; 542b5067a8SAxel Rasmussen 552b5067a8SAxel Rasmussen /* Called with reg_lock held. */ 562b5067a8SAxel Rasmussen static void free_memcg_path_bufs(void) 572b5067a8SAxel Rasmussen { 58832b5072SNicolas Saenz Julienne struct memcg_path *memcg_path; 592b5067a8SAxel Rasmussen int cpu; 602b5067a8SAxel Rasmussen char **old = tmp_bufs; 612b5067a8SAxel Rasmussen 622b5067a8SAxel Rasmussen for_each_possible_cpu(cpu) { 63832b5072SNicolas Saenz Julienne memcg_path = per_cpu_ptr(&memcg_paths, cpu); 64832b5072SNicolas Saenz Julienne *(old++) = rcu_dereference_protected(memcg_path->buf, 652b5067a8SAxel Rasmussen lockdep_is_held(®_lock)); 66832b5072SNicolas Saenz Julienne rcu_assign_pointer(memcg_path->buf, NULL); 672b5067a8SAxel Rasmussen } 682b5067a8SAxel Rasmussen 692b5067a8SAxel Rasmussen /* Wait for inflight memcg_path_buf users to finish. */ 702b5067a8SAxel Rasmussen synchronize_rcu(); 712b5067a8SAxel Rasmussen 722b5067a8SAxel Rasmussen old = tmp_bufs; 732b5067a8SAxel Rasmussen for_each_possible_cpu(cpu) { 742b5067a8SAxel Rasmussen kfree(*(old++)); 752b5067a8SAxel Rasmussen } 762b5067a8SAxel Rasmussen 772b5067a8SAxel Rasmussen kfree(tmp_bufs); 782b5067a8SAxel Rasmussen tmp_bufs = NULL; 792b5067a8SAxel Rasmussen } 802b5067a8SAxel Rasmussen 812b5067a8SAxel Rasmussen int trace_mmap_lock_reg(void) 822b5067a8SAxel Rasmussen { 832b5067a8SAxel Rasmussen int cpu; 842b5067a8SAxel Rasmussen char *new; 852b5067a8SAxel Rasmussen 862b5067a8SAxel Rasmussen mutex_lock(®_lock); 872b5067a8SAxel Rasmussen 882b5067a8SAxel Rasmussen /* If the refcount is going 0->1, proceed with allocating buffers. */ 892b5067a8SAxel Rasmussen if (reg_refcount++) 902b5067a8SAxel Rasmussen goto out; 912b5067a8SAxel Rasmussen 922b5067a8SAxel Rasmussen tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs), 932b5067a8SAxel Rasmussen GFP_KERNEL); 942b5067a8SAxel Rasmussen if (tmp_bufs == NULL) 952b5067a8SAxel Rasmussen goto out_fail; 962b5067a8SAxel Rasmussen 972b5067a8SAxel Rasmussen for_each_possible_cpu(cpu) { 982b5067a8SAxel Rasmussen new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL); 992b5067a8SAxel Rasmussen if (new == NULL) 1002b5067a8SAxel Rasmussen goto out_fail_free; 101832b5072SNicolas Saenz Julienne rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new); 1022b5067a8SAxel Rasmussen /* Don't need to wait for inflights, they'd have gotten NULL. */ 1032b5067a8SAxel Rasmussen } 1042b5067a8SAxel Rasmussen 1052b5067a8SAxel Rasmussen out: 1062b5067a8SAxel Rasmussen mutex_unlock(®_lock); 1072b5067a8SAxel Rasmussen return 0; 1082b5067a8SAxel Rasmussen 1092b5067a8SAxel Rasmussen out_fail_free: 1102b5067a8SAxel Rasmussen free_memcg_path_bufs(); 1112b5067a8SAxel Rasmussen out_fail: 1122b5067a8SAxel Rasmussen /* Since we failed, undo the earlier ref increment. */ 1132b5067a8SAxel Rasmussen --reg_refcount; 1142b5067a8SAxel Rasmussen 1152b5067a8SAxel Rasmussen mutex_unlock(®_lock); 1162b5067a8SAxel Rasmussen return -ENOMEM; 1172b5067a8SAxel Rasmussen } 1182b5067a8SAxel Rasmussen 1192b5067a8SAxel Rasmussen void trace_mmap_lock_unreg(void) 1202b5067a8SAxel Rasmussen { 1212b5067a8SAxel Rasmussen mutex_lock(®_lock); 1222b5067a8SAxel Rasmussen 1232b5067a8SAxel Rasmussen /* If the refcount is going 1->0, proceed with freeing buffers. */ 1242b5067a8SAxel Rasmussen if (--reg_refcount) 1252b5067a8SAxel Rasmussen goto out; 1262b5067a8SAxel Rasmussen 1272b5067a8SAxel Rasmussen free_memcg_path_bufs(); 1282b5067a8SAxel Rasmussen 1292b5067a8SAxel Rasmussen out: 1302b5067a8SAxel Rasmussen mutex_unlock(®_lock); 1312b5067a8SAxel Rasmussen } 1322b5067a8SAxel Rasmussen 1332b5067a8SAxel Rasmussen static inline char *get_memcg_path_buf(void) 1342b5067a8SAxel Rasmussen { 135832b5072SNicolas Saenz Julienne struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths); 1362b5067a8SAxel Rasmussen char *buf; 1372b5067a8SAxel Rasmussen int idx; 1382b5067a8SAxel Rasmussen 1392b5067a8SAxel Rasmussen rcu_read_lock(); 140832b5072SNicolas Saenz Julienne buf = rcu_dereference(memcg_path->buf); 1412b5067a8SAxel Rasmussen if (buf == NULL) { 1422b5067a8SAxel Rasmussen rcu_read_unlock(); 1432b5067a8SAxel Rasmussen return NULL; 1442b5067a8SAxel Rasmussen } 145832b5072SNicolas Saenz Julienne idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) - 1462b5067a8SAxel Rasmussen MEMCG_PATH_BUF_SIZE; 1472b5067a8SAxel Rasmussen return &buf[idx]; 1482b5067a8SAxel Rasmussen } 1492b5067a8SAxel Rasmussen 1502b5067a8SAxel Rasmussen static inline void put_memcg_path_buf(void) 1512b5067a8SAxel Rasmussen { 152832b5072SNicolas Saenz Julienne local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx); 1532b5067a8SAxel Rasmussen rcu_read_unlock(); 1542b5067a8SAxel Rasmussen } 1552b5067a8SAxel Rasmussen 156*d01079f3SMel Gorman #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ 157*d01079f3SMel Gorman do { \ 158*d01079f3SMel Gorman const char *memcg_path; \ 159*d01079f3SMel Gorman preempt_disable(); \ 160*d01079f3SMel Gorman memcg_path = get_mm_memcg_path(mm); \ 161*d01079f3SMel Gorman trace_mmap_lock_##type(mm, \ 162*d01079f3SMel Gorman memcg_path != NULL ? memcg_path : "", \ 163*d01079f3SMel Gorman ##__VA_ARGS__); \ 164*d01079f3SMel Gorman if (likely(memcg_path != NULL)) \ 165*d01079f3SMel Gorman put_memcg_path_buf(); \ 166*d01079f3SMel Gorman preempt_enable(); \ 167*d01079f3SMel Gorman } while (0) 168*d01079f3SMel Gorman 169*d01079f3SMel Gorman #else /* !CONFIG_MEMCG */ 170*d01079f3SMel Gorman 171*d01079f3SMel Gorman int trace_mmap_lock_reg(void) 172*d01079f3SMel Gorman { 173*d01079f3SMel Gorman return 0; 174*d01079f3SMel Gorman } 175*d01079f3SMel Gorman 176*d01079f3SMel Gorman void trace_mmap_lock_unreg(void) 177*d01079f3SMel Gorman { 178*d01079f3SMel Gorman } 179*d01079f3SMel Gorman 180*d01079f3SMel Gorman #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ 181*d01079f3SMel Gorman trace_mmap_lock_##type(mm, "", ##__VA_ARGS__) 182*d01079f3SMel Gorman 183*d01079f3SMel Gorman #endif /* CONFIG_MEMCG */ 184*d01079f3SMel Gorman 185*d01079f3SMel Gorman #ifdef CONFIG_TRACING 186*d01079f3SMel Gorman #ifdef CONFIG_MEMCG 1872b5067a8SAxel Rasmussen /* 1882b5067a8SAxel Rasmussen * Write the given mm_struct's memcg path to a percpu buffer, and return a 1892b5067a8SAxel Rasmussen * pointer to it. If the path cannot be determined, or no buffer was available 1902b5067a8SAxel Rasmussen * (because the trace event is being unregistered), NULL is returned. 1912b5067a8SAxel Rasmussen * 1922b5067a8SAxel Rasmussen * Note: buffers are allocated per-cpu to avoid locking, so preemption must be 1932b5067a8SAxel Rasmussen * disabled by the caller before calling us, and re-enabled only after the 1942b5067a8SAxel Rasmussen * caller is done with the pointer. 1952b5067a8SAxel Rasmussen * 1962b5067a8SAxel Rasmussen * The caller must call put_memcg_path_buf() once the buffer is no longer 1972b5067a8SAxel Rasmussen * needed. This must be done while preemption is still disabled. 1982b5067a8SAxel Rasmussen */ 1992b5067a8SAxel Rasmussen static const char *get_mm_memcg_path(struct mm_struct *mm) 2002b5067a8SAxel Rasmussen { 2012b5067a8SAxel Rasmussen char *buf = NULL; 2022b5067a8SAxel Rasmussen struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); 2032b5067a8SAxel Rasmussen 2042b5067a8SAxel Rasmussen if (memcg == NULL) 2052b5067a8SAxel Rasmussen goto out; 2062b5067a8SAxel Rasmussen if (unlikely(memcg->css.cgroup == NULL)) 2072b5067a8SAxel Rasmussen goto out_put; 2082b5067a8SAxel Rasmussen 2092b5067a8SAxel Rasmussen buf = get_memcg_path_buf(); 2102b5067a8SAxel Rasmussen if (buf == NULL) 2112b5067a8SAxel Rasmussen goto out_put; 2122b5067a8SAxel Rasmussen 2132b5067a8SAxel Rasmussen cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE); 2142b5067a8SAxel Rasmussen 2152b5067a8SAxel Rasmussen out_put: 2162b5067a8SAxel Rasmussen css_put(&memcg->css); 2172b5067a8SAxel Rasmussen out: 2182b5067a8SAxel Rasmussen return buf; 2192b5067a8SAxel Rasmussen } 2202b5067a8SAxel Rasmussen 2212b5067a8SAxel Rasmussen #endif /* CONFIG_MEMCG */ 2222b5067a8SAxel Rasmussen 2232b5067a8SAxel Rasmussen /* 2242b5067a8SAxel Rasmussen * Trace calls must be in a separate file, as otherwise there's a circular 2252b5067a8SAxel Rasmussen * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. 2262b5067a8SAxel Rasmussen */ 2272b5067a8SAxel Rasmussen 2282b5067a8SAxel Rasmussen void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) 2292b5067a8SAxel Rasmussen { 2302b5067a8SAxel Rasmussen TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); 2312b5067a8SAxel Rasmussen } 2322b5067a8SAxel Rasmussen EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); 2332b5067a8SAxel Rasmussen 2342b5067a8SAxel Rasmussen void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, 2352b5067a8SAxel Rasmussen bool success) 2362b5067a8SAxel Rasmussen { 2372b5067a8SAxel Rasmussen TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); 2382b5067a8SAxel Rasmussen } 2392b5067a8SAxel Rasmussen EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); 2402b5067a8SAxel Rasmussen 2412b5067a8SAxel Rasmussen void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) 2422b5067a8SAxel Rasmussen { 2432b5067a8SAxel Rasmussen TRACE_MMAP_LOCK_EVENT(released, mm, write); 2442b5067a8SAxel Rasmussen } 2452b5067a8SAxel Rasmussen EXPORT_SYMBOL(__mmap_lock_do_trace_released); 246*d01079f3SMel Gorman #endif /* CONFIG_TRACING */ 247