bpf/prog_tests/usdt.c

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>

#define _SDT_HAS_SEMAPHORES 1
#include "../sdt.h"

#include "test_usdt.skel.h"
#include "test_urandom_usdt.skel.h"

int lets_test_this(int);

static volatile int idx = 2;
static volatile __u64 bla = 0xFEDCBA9876543210ULL;
static volatile short nums[] = {-1, -2, -3, -4};

static volatile struct {
	int x;
	signed char y;
} t1 = { 1, -127 };

#define SEC(name) __attribute__((section(name), used))

unsigned short test_usdt0_semaphore SEC(".probes");
unsigned short test_usdt3_semaphore SEC(".probes");
unsigned short test_usdt12_semaphore SEC(".probes");

static void __always_inline trigger_func(int x) {
	long y = 42;

	if (test_usdt0_semaphore)
		STAP_PROBE(test, usdt0);
	if (test_usdt3_semaphore)
		STAP_PROBE3(test, usdt3, x, y, &bla);
	if (test_usdt12_semaphore) {
		STAP_PROBE12(test, usdt12,
			     x, x + 1, y, x + y, 5,
			     y / 7, bla, &bla, -9, nums[x],
			     nums[idx], t1.y);
	}
}

#if defined(__x86_64__) || defined(__i386__)
/*
 * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
 * - 'size' is the size in bytes of the array element, and its sign indicates
 *   whether the type is signed (negative) or unsigned (positive).
 * - 'base_reg' is the register holding the base address, normally rdx or edx
 * - 'index_reg' is the register holding the index, normally rax or eax
 * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
 *    size of the element type.
 *
 * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
 * - size: -2 (negative because 'short' is signed)
 * - scale: 2 (since sizeof(short) == 2)
 *
 * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
 */
static volatile short array[] = {-1, -2, -3, -4};

#if defined(__x86_64__)
#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
#else
#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
#endif

unsigned short test_usdt_sib_semaphore SEC(".probes");

static void trigger_sib_spec(void)
{
	/*
	 * Force SIB addressing with inline assembly.
	 *
	 * You must compile with -std=gnu99 or -std=c99 to use the
	 * STAP_PROBE_ASM macro.
	 *
	 * The STAP_PROBE_ASM macro generates a quoted string that gets
	 * inserted between the surrounding assembly instructions. In this
	 * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
	 * stream, creating a probe point between the asm statement boundaries.
	 * It works fine with gcc/clang.
	 *
	 * Register constraints:
	 * - "d"(array): Binds the 'array' variable to %rdx or %edx register
	 * - "a"(0): Binds the constant 0 to %rax or %eax register
	 * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
	 * %%rax(%eax), they contain the expected values for SIB addressing.
	 *
	 * The "memory" clobber prevents the compiler from reordering memory
	 * accesses around the probe point, ensuring that the probe behavior
	 * is predictable and consistent.
	 */
	asm volatile(
		STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
		:
		: "d"(array), "a"(0)
		: "memory"
	);
}
#endif

static void subtest_basic_usdt(bool optimized)
{
	LIBBPF_OPTS(bpf_usdt_opts, opts);
	struct test_usdt *skel;
	struct test_usdt__bss *bss;
	int err, i, called;
	const __u64 expected_cookie = 0xcafedeadbeeffeed;

#define TRIGGER(x) ({			\
	trigger_func(x);		\
	if (optimized)			\
		trigger_func(x);	\
	optimized ? 2 : 1;		\
	})

	skel = test_usdt__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel_open"))
		return;

	bss = skel->bss;
	bss->my_pid = getpid();

	err = test_usdt__attach(skel);
	if (!ASSERT_OK(err, "skel_attach"))
		goto cleanup;

	/* usdt0 won't be auto-attached */
	opts.usdt_cookie = expected_cookie;
	skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
						     0 /*self*/, "/proc/self/exe",
						     "test", "usdt0", &opts);
	if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
		goto cleanup;

#if defined(__x86_64__) || defined(__i386__)
	opts.usdt_cookie = expected_cookie;
	skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
							 0 /*self*/, "/proc/self/exe",
							 "test", "usdt_sib", &opts);
	if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
		goto cleanup;
#endif

	alled = TRIGGER(1);

	ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
	ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
	ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");

	ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
	ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
	ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
	ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");

	/* auto-attached usdt3 gets default zero cookie value */
	ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");

	ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
	ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
	ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
	ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
	ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size");
	ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size");
	ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size");

	/* auto-attached usdt12 gets default zero cookie value */
	ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
	ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");

	ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
	ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
	ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
	ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
	ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
	ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
	ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
	ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
	ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
	ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
	ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
	ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");

	int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 };

	for (i = 0; i < 12; i++)
		ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size");

	/* trigger_func() is marked __always_inline, so USDT invocations will be
	 * inlined in two different places, meaning that each USDT will have
	 * at least 2 different places to be attached to. This verifies that
	 * bpf_program__attach_usdt() handles this properly and attaches to
	 * all possible places of USDT invocation.
	 */
	called += TRIGGER(2);

	ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
	ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
	ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");

	/* only check values that depend on trigger_func()'s input value */
	ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");

	ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
	ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
	ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
	ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");

	/* detach and re-attach usdt3 */
	bpf_link__destroy(skel->links.usdt3);

	opts.usdt_cookie = 0xBADC00C51E;
	skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
						     "/proc/self/exe", "test", "usdt3", &opts);
	if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
		goto cleanup;

	called += TRIGGER(3);

	ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
	/* this time usdt3 has custom cookie */
	ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");

	ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
	ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
	ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
	ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");

#if defined(__x86_64__) || defined(__i386__)
	trigger_sib_spec();
	ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
	ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
	ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
	ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
	ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
	ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
#endif

cleanup:
	test_usdt__destroy(skel);
#undef TRIGGER
}

unsigned short test_usdt_100_semaphore SEC(".probes");
unsigned short test_usdt_300_semaphore SEC(".probes");
unsigned short test_usdt_400_semaphore SEC(".probes");

#define R10(F, X)  F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
		   F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
		   R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);

/* carefully control that we get exactly 100 inlines by preventing inlining */
static void __always_inline f100(int x)
{
	STAP_PROBE1(test, usdt_100, x);
}

__weak void trigger_100_usdts(void)
{
	R100(f100, 0);
}

/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
 * many slots for specs. It's important that each STAP_PROBE2() invocation
 * (after untolling) gets different arg spec due to compiler inlining i as
 * a constant
 */
static void __always_inline f300(int x)
{
	STAP_PROBE1(test, usdt_300, x);
}

__weak void trigger_300_usdts(void)
{
	R100(f300, 0);
	R100(f300, 100);
	R100(f300, 200);
}

static void __always_inline f400(int x __attribute__((unused)))
{
	STAP_PROBE1(test, usdt_400, 400);
}

/* this time we have 400 different USDT call sites, but they have uniform
 * argument location, so libbpf's spec string deduplication logic should keep
 * spec count use very small and so we should be able to attach to all 400
 * call sites
 */
__weak void trigger_400_usdts(void)
{
	R100(f400, 0);
	R100(f400, 100);
	R100(f400, 200);
	R100(f400, 300);
}

static void subtest_multispec_usdt(void)
{
	LIBBPF_OPTS(bpf_usdt_opts, opts);
	struct test_usdt *skel;
	struct test_usdt__bss *bss;
	int err, i;

	skel = test_usdt__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel_open"))
		return;

	bss = skel->bss;
	bss->my_pid = getpid();

	err = test_usdt__attach(skel);
	if (!ASSERT_OK(err, "skel_attach"))
		goto cleanup;

	/* usdt_100 is auto-attached and there are 100 inlined call sites,
	 * let's validate that all of them are properly attached to and
	 * handled from BPF side
	 */
	trigger_100_usdts();

	ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
	ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");

	/* Stress test free spec ID tracking. By default libbpf allows up to
	 * 256 specs to be used, so if we don't return free spec IDs back
	 * after few detachments and re-attachments we should run out of
	 * available spec IDs.
	 */
	for (i = 0; i < 2; i++) {
		bpf_link__destroy(skel->links.usdt_100);

		skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
							        "/proc/self/exe",
								"test", "usdt_100", NULL);
		if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
			goto cleanup;

		bss->usdt_100_sum = 0;
		trigger_100_usdts();

		ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
		ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
	}

	/* Now let's step it up and try to attach USDT that requires more than
	 * 256 attach points with different specs for each.
	 * Note that we need trigger_300_usdts() only to actually have 300
	 * USDT call sites, we are not going to actually trace them.
	 */
	trigger_300_usdts();

	bpf_link__destroy(skel->links.usdt_100);

	bss->usdt_100_called = 0;
	bss->usdt_100_sum = 0;

	/* If built with arm64/clang, there will be much less number of specs
	 * for usdt_300 call sites.
	 */
#if !defined(__aarch64__) || !defined(__clang__)
	/* we'll reuse usdt_100 BPF program for usdt_300 test */
	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
							"test", "usdt_300", NULL);
	err = -errno;
	if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
		goto cleanup;
	ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");

	/* let's check that there are no "dangling" BPF programs attached due
	 * to partial success of the above test:usdt_300 attachment
	 */
	f300(777); /* this is 301st instance of usdt_300 */

	ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
	ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
#endif

	/* This time we have USDT with 400 inlined invocations, but arg specs
	 * should be the same across all sites, so libbpf will only need to
	 * use one spec and thus we'll be able to attach 400 uprobes
	 * successfully.
	 *
	 * Again, we are reusing usdt_100 BPF program.
	 */
	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
							"/proc/self/exe",
							"test", "usdt_400", NULL);
	if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
		goto cleanup;

	trigger_400_usdts();

	ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
	ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum");

cleanup:
	test_usdt__destroy(skel);
}

static FILE *urand_spawn(int *pid)
{
	FILE *f;

	/* urandom_read's stdout is wired into f */
	f = popen("./urandom_read 1 report-pid", "r");
	if (!f)
		return NULL;

	if (fscanf(f, "%d", pid) != 1) {
		pclose(f);
		errno = EINVAL;
		return NULL;
	}

	return f;
}

static int urand_trigger(FILE **urand_pipe)
{
	int exit_code;

	/* pclose() waits for child process to exit and returns their exit code */
	exit_code = pclose(*urand_pipe);
	*urand_pipe = NULL;

	return exit_code;
}

static void subtest_urandom_usdt(bool auto_attach)
{
	struct test_urandom_usdt *skel;
	struct test_urandom_usdt__bss *bss;
	struct bpf_link *l;
	FILE *urand_pipe = NULL;
	int err, urand_pid = 0;

	skel = test_urandom_usdt__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel_open"))
		return;

	urand_pipe = urand_spawn(&urand_pid);
	if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
		goto cleanup;

	bss = skel->bss;
	bss->urand_pid = urand_pid;

	if (auto_attach) {
		err = test_urandom_usdt__attach(skel);
		if (!ASSERT_OK(err, "skel_auto_attach"))
			goto cleanup;
	} else {
		l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
					     urand_pid, "./urandom_read",
					     "urand", "read_without_sema", NULL);
		if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
			goto cleanup;
		skel->links.urand_read_without_sema = l;

		l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
					     urand_pid, "./urandom_read",
					     "urand", "read_with_sema", NULL);
		if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
			goto cleanup;
		skel->links.urand_read_with_sema = l;

		l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
					     urand_pid, "./liburandom_read.so",
					     "urandlib", "read_without_sema", NULL);
		if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
			goto cleanup;
		skel->links.urandlib_read_without_sema = l;

		l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
					     urand_pid, "./liburandom_read.so",
					     "urandlib", "read_with_sema", NULL);
		if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
			goto cleanup;
		skel->links.urandlib_read_with_sema = l;

	}

	/* trigger urandom_read USDTs */
	ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");

	ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
	ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");

	ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
	ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");

	ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
	ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");

	ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
	ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");

cleanup:
	if (urand_pipe)
		pclose(urand_pipe);
	test_urandom_usdt__destroy(skel);
}

void test_usdt(void)
{
	if (test__start_subtest("basic"))
		subtest_basic_usdt(false);
#ifdef __x86_64__
	if (test__start_subtest("basic_optimized"))
		subtest_basic_usdt(true);
#endif
	if (test__start_subtest("multispec"))
		subtest_multispec_usdt();
	if (test__start_subtest("urand_auto_attach"))
		subtest_urandom_usdt(true /* auto_attach */);
	if (test__start_subtest("urand_pid_attach"))
		subtest_urandom_usdt(false /* auto_attach */);
}