xref: /linux/mm/mmap_lock.c (revision 762f99f4f3cb41a775b5157dd761217beba65873)
12b5067a8SAxel Rasmussen // SPDX-License-Identifier: GPL-2.0
22b5067a8SAxel Rasmussen #define CREATE_TRACE_POINTS
32b5067a8SAxel Rasmussen #include <trace/events/mmap_lock.h>
42b5067a8SAxel Rasmussen 
52b5067a8SAxel Rasmussen #include <linux/mm.h>
62b5067a8SAxel Rasmussen #include <linux/cgroup.h>
72b5067a8SAxel Rasmussen #include <linux/memcontrol.h>
82b5067a8SAxel Rasmussen #include <linux/mmap_lock.h>
92b5067a8SAxel Rasmussen #include <linux/mutex.h>
102b5067a8SAxel Rasmussen #include <linux/percpu.h>
112b5067a8SAxel Rasmussen #include <linux/rcupdate.h>
122b5067a8SAxel Rasmussen #include <linux/smp.h>
132b5067a8SAxel Rasmussen #include <linux/trace_events.h>
14832b5072SNicolas Saenz Julienne #include <linux/local_lock.h>
152b5067a8SAxel Rasmussen 
162b5067a8SAxel Rasmussen EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking);
172b5067a8SAxel Rasmussen EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned);
182b5067a8SAxel Rasmussen EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released);
192b5067a8SAxel Rasmussen 
202b5067a8SAxel Rasmussen #ifdef CONFIG_MEMCG
212b5067a8SAxel Rasmussen 
222b5067a8SAxel Rasmussen /*
232b5067a8SAxel Rasmussen  * Our various events all share the same buffer (because we don't want or need
242b5067a8SAxel Rasmussen  * to allocate a set of buffers *per event type*), so we need to protect against
252b5067a8SAxel Rasmussen  * concurrent _reg() and _unreg() calls, and count how many _reg() calls have
262b5067a8SAxel Rasmussen  * been made.
272b5067a8SAxel Rasmussen  */
282b5067a8SAxel Rasmussen static DEFINE_MUTEX(reg_lock);
292b5067a8SAxel Rasmussen static int reg_refcount; /* Protected by reg_lock. */
302b5067a8SAxel Rasmussen 
312b5067a8SAxel Rasmussen /*
322b5067a8SAxel Rasmussen  * Size of the buffer for memcg path names. Ignoring stack trace support,
332b5067a8SAxel Rasmussen  * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it.
342b5067a8SAxel Rasmussen  */
352b5067a8SAxel Rasmussen #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL
362b5067a8SAxel Rasmussen 
372b5067a8SAxel Rasmussen /*
382b5067a8SAxel Rasmussen  * How many contexts our trace events might be called in: normal, softirq, irq,
392b5067a8SAxel Rasmussen  * and NMI.
402b5067a8SAxel Rasmussen  */
412b5067a8SAxel Rasmussen #define CONTEXT_COUNT 4
422b5067a8SAxel Rasmussen 
43832b5072SNicolas Saenz Julienne struct memcg_path {
44832b5072SNicolas Saenz Julienne 	local_lock_t lock;
45832b5072SNicolas Saenz Julienne 	char __rcu *buf;
46832b5072SNicolas Saenz Julienne 	local_t buf_idx;
47832b5072SNicolas Saenz Julienne };
48832b5072SNicolas Saenz Julienne static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = {
49832b5072SNicolas Saenz Julienne 	.lock = INIT_LOCAL_LOCK(lock),
50832b5072SNicolas Saenz Julienne 	.buf_idx = LOCAL_INIT(0),
51832b5072SNicolas Saenz Julienne };
52832b5072SNicolas Saenz Julienne 
532b5067a8SAxel Rasmussen static char **tmp_bufs;
542b5067a8SAxel Rasmussen 
552b5067a8SAxel Rasmussen /* Called with reg_lock held. */
free_memcg_path_bufs(void)562b5067a8SAxel Rasmussen static void free_memcg_path_bufs(void)
572b5067a8SAxel Rasmussen {
58832b5072SNicolas Saenz Julienne 	struct memcg_path *memcg_path;
592b5067a8SAxel Rasmussen 	int cpu;
602b5067a8SAxel Rasmussen 	char **old = tmp_bufs;
612b5067a8SAxel Rasmussen 
622b5067a8SAxel Rasmussen 	for_each_possible_cpu(cpu) {
63832b5072SNicolas Saenz Julienne 		memcg_path = per_cpu_ptr(&memcg_paths, cpu);
64832b5072SNicolas Saenz Julienne 		*(old++) = rcu_dereference_protected(memcg_path->buf,
652b5067a8SAxel Rasmussen 			lockdep_is_held(&reg_lock));
66832b5072SNicolas Saenz Julienne 		rcu_assign_pointer(memcg_path->buf, NULL);
672b5067a8SAxel Rasmussen 	}
682b5067a8SAxel Rasmussen 
692b5067a8SAxel Rasmussen 	/* Wait for inflight memcg_path_buf users to finish. */
702b5067a8SAxel Rasmussen 	synchronize_rcu();
712b5067a8SAxel Rasmussen 
722b5067a8SAxel Rasmussen 	old = tmp_bufs;
732b5067a8SAxel Rasmussen 	for_each_possible_cpu(cpu) {
742b5067a8SAxel Rasmussen 		kfree(*(old++));
752b5067a8SAxel Rasmussen 	}
762b5067a8SAxel Rasmussen 
772b5067a8SAxel Rasmussen 	kfree(tmp_bufs);
782b5067a8SAxel Rasmussen 	tmp_bufs = NULL;
792b5067a8SAxel Rasmussen }
802b5067a8SAxel Rasmussen 
trace_mmap_lock_reg(void)812b5067a8SAxel Rasmussen int trace_mmap_lock_reg(void)
822b5067a8SAxel Rasmussen {
832b5067a8SAxel Rasmussen 	int cpu;
842b5067a8SAxel Rasmussen 	char *new;
852b5067a8SAxel Rasmussen 
862b5067a8SAxel Rasmussen 	mutex_lock(&reg_lock);
872b5067a8SAxel Rasmussen 
882b5067a8SAxel Rasmussen 	/* If the refcount is going 0->1, proceed with allocating buffers. */
892b5067a8SAxel Rasmussen 	if (reg_refcount++)
902b5067a8SAxel Rasmussen 		goto out;
912b5067a8SAxel Rasmussen 
922b5067a8SAxel Rasmussen 	tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs),
932b5067a8SAxel Rasmussen 				 GFP_KERNEL);
942b5067a8SAxel Rasmussen 	if (tmp_bufs == NULL)
952b5067a8SAxel Rasmussen 		goto out_fail;
962b5067a8SAxel Rasmussen 
972b5067a8SAxel Rasmussen 	for_each_possible_cpu(cpu) {
982b5067a8SAxel Rasmussen 		new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL);
992b5067a8SAxel Rasmussen 		if (new == NULL)
1002b5067a8SAxel Rasmussen 			goto out_fail_free;
101832b5072SNicolas Saenz Julienne 		rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new);
1022b5067a8SAxel Rasmussen 		/* Don't need to wait for inflights, they'd have gotten NULL. */
1032b5067a8SAxel Rasmussen 	}
1042b5067a8SAxel Rasmussen 
1052b5067a8SAxel Rasmussen out:
1062b5067a8SAxel Rasmussen 	mutex_unlock(&reg_lock);
1072b5067a8SAxel Rasmussen 	return 0;
1082b5067a8SAxel Rasmussen 
1092b5067a8SAxel Rasmussen out_fail_free:
1102b5067a8SAxel Rasmussen 	free_memcg_path_bufs();
1112b5067a8SAxel Rasmussen out_fail:
1122b5067a8SAxel Rasmussen 	/* Since we failed, undo the earlier ref increment. */
1132b5067a8SAxel Rasmussen 	--reg_refcount;
1142b5067a8SAxel Rasmussen 
1152b5067a8SAxel Rasmussen 	mutex_unlock(&reg_lock);
1162b5067a8SAxel Rasmussen 	return -ENOMEM;
1172b5067a8SAxel Rasmussen }
1182b5067a8SAxel Rasmussen 
trace_mmap_lock_unreg(void)1192b5067a8SAxel Rasmussen void trace_mmap_lock_unreg(void)
1202b5067a8SAxel Rasmussen {
1212b5067a8SAxel Rasmussen 	mutex_lock(&reg_lock);
1222b5067a8SAxel Rasmussen 
1232b5067a8SAxel Rasmussen 	/* If the refcount is going 1->0, proceed with freeing buffers. */
1242b5067a8SAxel Rasmussen 	if (--reg_refcount)
1252b5067a8SAxel Rasmussen 		goto out;
1262b5067a8SAxel Rasmussen 
1272b5067a8SAxel Rasmussen 	free_memcg_path_bufs();
1282b5067a8SAxel Rasmussen 
1292b5067a8SAxel Rasmussen out:
1302b5067a8SAxel Rasmussen 	mutex_unlock(&reg_lock);
1312b5067a8SAxel Rasmussen }
1322b5067a8SAxel Rasmussen 
get_memcg_path_buf(void)1332b5067a8SAxel Rasmussen static inline char *get_memcg_path_buf(void)
1342b5067a8SAxel Rasmussen {
135832b5072SNicolas Saenz Julienne 	struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths);
1362b5067a8SAxel Rasmussen 	char *buf;
1372b5067a8SAxel Rasmussen 	int idx;
1382b5067a8SAxel Rasmussen 
1392b5067a8SAxel Rasmussen 	rcu_read_lock();
140832b5072SNicolas Saenz Julienne 	buf = rcu_dereference(memcg_path->buf);
1412b5067a8SAxel Rasmussen 	if (buf == NULL) {
1422b5067a8SAxel Rasmussen 		rcu_read_unlock();
1432b5067a8SAxel Rasmussen 		return NULL;
1442b5067a8SAxel Rasmussen 	}
145832b5072SNicolas Saenz Julienne 	idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) -
1462b5067a8SAxel Rasmussen 	      MEMCG_PATH_BUF_SIZE;
1472b5067a8SAxel Rasmussen 	return &buf[idx];
1482b5067a8SAxel Rasmussen }
1492b5067a8SAxel Rasmussen 
put_memcg_path_buf(void)1502b5067a8SAxel Rasmussen static inline void put_memcg_path_buf(void)
1512b5067a8SAxel Rasmussen {
152832b5072SNicolas Saenz Julienne 	local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx);
1532b5067a8SAxel Rasmussen 	rcu_read_unlock();
1542b5067a8SAxel Rasmussen }
1552b5067a8SAxel Rasmussen 
156d01079f3SMel Gorman #define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
157d01079f3SMel Gorman 	do {                                                                   \
158d01079f3SMel Gorman 		const char *memcg_path;                                        \
159*e904c2ccSMuchun Song 		local_lock(&memcg_paths.lock);                                 \
160d01079f3SMel Gorman 		memcg_path = get_mm_memcg_path(mm);                            \
161d01079f3SMel Gorman 		trace_mmap_lock_##type(mm,                                     \
162d01079f3SMel Gorman 				       memcg_path != NULL ? memcg_path : "",   \
163d01079f3SMel Gorman 				       ##__VA_ARGS__);                         \
164d01079f3SMel Gorman 		if (likely(memcg_path != NULL))                                \
165d01079f3SMel Gorman 			put_memcg_path_buf();                                  \
166*e904c2ccSMuchun Song 		local_unlock(&memcg_paths.lock);                               \
167d01079f3SMel Gorman 	} while (0)
168d01079f3SMel Gorman 
169d01079f3SMel Gorman #else /* !CONFIG_MEMCG */
170d01079f3SMel Gorman 
trace_mmap_lock_reg(void)171d01079f3SMel Gorman int trace_mmap_lock_reg(void)
172d01079f3SMel Gorman {
173d01079f3SMel Gorman 	return 0;
174d01079f3SMel Gorman }
175d01079f3SMel Gorman 
trace_mmap_lock_unreg(void)176d01079f3SMel Gorman void trace_mmap_lock_unreg(void)
177d01079f3SMel Gorman {
178d01079f3SMel Gorman }
179d01079f3SMel Gorman 
180d01079f3SMel Gorman #define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
181d01079f3SMel Gorman 	trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
182d01079f3SMel Gorman 
183d01079f3SMel Gorman #endif /* CONFIG_MEMCG */
184d01079f3SMel Gorman 
185d01079f3SMel Gorman #ifdef CONFIG_TRACING
186d01079f3SMel Gorman #ifdef CONFIG_MEMCG
1872b5067a8SAxel Rasmussen /*
1882b5067a8SAxel Rasmussen  * Write the given mm_struct's memcg path to a percpu buffer, and return a
1892b5067a8SAxel Rasmussen  * pointer to it. If the path cannot be determined, or no buffer was available
1902b5067a8SAxel Rasmussen  * (because the trace event is being unregistered), NULL is returned.
1912b5067a8SAxel Rasmussen  *
1922b5067a8SAxel Rasmussen  * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
1932b5067a8SAxel Rasmussen  * disabled by the caller before calling us, and re-enabled only after the
1942b5067a8SAxel Rasmussen  * caller is done with the pointer.
1952b5067a8SAxel Rasmussen  *
1962b5067a8SAxel Rasmussen  * The caller must call put_memcg_path_buf() once the buffer is no longer
1972b5067a8SAxel Rasmussen  * needed. This must be done while preemption is still disabled.
1982b5067a8SAxel Rasmussen  */
get_mm_memcg_path(struct mm_struct * mm)1992b5067a8SAxel Rasmussen static const char *get_mm_memcg_path(struct mm_struct *mm)
2002b5067a8SAxel Rasmussen {
2012b5067a8SAxel Rasmussen 	char *buf = NULL;
2022b5067a8SAxel Rasmussen 	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
2032b5067a8SAxel Rasmussen 
2042b5067a8SAxel Rasmussen 	if (memcg == NULL)
2052b5067a8SAxel Rasmussen 		goto out;
2062b5067a8SAxel Rasmussen 	if (unlikely(memcg->css.cgroup == NULL))
2072b5067a8SAxel Rasmussen 		goto out_put;
2082b5067a8SAxel Rasmussen 
2092b5067a8SAxel Rasmussen 	buf = get_memcg_path_buf();
2102b5067a8SAxel Rasmussen 	if (buf == NULL)
2112b5067a8SAxel Rasmussen 		goto out_put;
2122b5067a8SAxel Rasmussen 
2132b5067a8SAxel Rasmussen 	cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE);
2142b5067a8SAxel Rasmussen 
2152b5067a8SAxel Rasmussen out_put:
2162b5067a8SAxel Rasmussen 	css_put(&memcg->css);
2172b5067a8SAxel Rasmussen out:
2182b5067a8SAxel Rasmussen 	return buf;
2192b5067a8SAxel Rasmussen }
2202b5067a8SAxel Rasmussen 
2212b5067a8SAxel Rasmussen #endif /* CONFIG_MEMCG */
2222b5067a8SAxel Rasmussen 
2232b5067a8SAxel Rasmussen /*
2242b5067a8SAxel Rasmussen  * Trace calls must be in a separate file, as otherwise there's a circular
2252b5067a8SAxel Rasmussen  * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
2262b5067a8SAxel Rasmussen  */
2272b5067a8SAxel Rasmussen 
__mmap_lock_do_trace_start_locking(struct mm_struct * mm,bool write)2282b5067a8SAxel Rasmussen void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write)
2292b5067a8SAxel Rasmussen {
2302b5067a8SAxel Rasmussen 	TRACE_MMAP_LOCK_EVENT(start_locking, mm, write);
2312b5067a8SAxel Rasmussen }
2322b5067a8SAxel Rasmussen EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking);
2332b5067a8SAxel Rasmussen 
__mmap_lock_do_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)2342b5067a8SAxel Rasmussen void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
2352b5067a8SAxel Rasmussen 					   bool success)
2362b5067a8SAxel Rasmussen {
2372b5067a8SAxel Rasmussen 	TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success);
2382b5067a8SAxel Rasmussen }
2392b5067a8SAxel Rasmussen EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned);
2402b5067a8SAxel Rasmussen 
__mmap_lock_do_trace_released(struct mm_struct * mm,bool write)2412b5067a8SAxel Rasmussen void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write)
2422b5067a8SAxel Rasmussen {
2432b5067a8SAxel Rasmussen 	TRACE_MMAP_LOCK_EVENT(released, mm, write);
2442b5067a8SAxel Rasmussen }
2452b5067a8SAxel Rasmussen EXPORT_SYMBOL(__mmap_lock_do_trace_released);
246d01079f3SMel Gorman #endif /* CONFIG_TRACING */
247