xref: /linux/tools/testing/selftests/bpf/bpf_arena_strsearch.h (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1*63066b7aSAlexei Starovoitov /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
2*63066b7aSAlexei Starovoitov /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3*63066b7aSAlexei Starovoitov #pragma once
4*63066b7aSAlexei Starovoitov #include "bpf_arena_common.h"
5*63066b7aSAlexei Starovoitov 
6*63066b7aSAlexei Starovoitov __noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
7*63066b7aSAlexei Starovoitov {
8*63066b7aSAlexei Starovoitov 	const char __arena *sc;
9*63066b7aSAlexei Starovoitov 
10*63066b7aSAlexei Starovoitov 	for (sc = s; *sc != '\0'; ++sc)
11*63066b7aSAlexei Starovoitov 		cond_break;
12*63066b7aSAlexei Starovoitov 	return sc - s;
13*63066b7aSAlexei Starovoitov }
14*63066b7aSAlexei Starovoitov 
15*63066b7aSAlexei Starovoitov /**
16*63066b7aSAlexei Starovoitov  * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
17*63066b7aSAlexei Starovoitov  * @pat: Shell-style pattern to match, e.g. "*.[ch]".
18*63066b7aSAlexei Starovoitov  * @str: String to match.  The pattern must match the entire string.
19*63066b7aSAlexei Starovoitov  *
20*63066b7aSAlexei Starovoitov  * Perform shell-style glob matching, returning true (1) if the match
21*63066b7aSAlexei Starovoitov  * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
22*63066b7aSAlexei Starovoitov  *
23*63066b7aSAlexei Starovoitov  * Pattern metacharacters are ?, *, [ and \.
24*63066b7aSAlexei Starovoitov  * (And, inside character classes, !, - and ].)
25*63066b7aSAlexei Starovoitov  *
26*63066b7aSAlexei Starovoitov  * This is small and simple implementation intended for device blacklists
27*63066b7aSAlexei Starovoitov  * where a string is matched against a number of patterns.  Thus, it
28*63066b7aSAlexei Starovoitov  * does not preprocess the patterns.  It is non-recursive, and run-time
29*63066b7aSAlexei Starovoitov  * is at most quadratic: strlen(@str)*strlen(@pat).
30*63066b7aSAlexei Starovoitov  *
31*63066b7aSAlexei Starovoitov  * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
32*63066b7aSAlexei Starovoitov  * it takes 6 passes over the pattern before matching the string.
33*63066b7aSAlexei Starovoitov  *
34*63066b7aSAlexei Starovoitov  * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
35*63066b7aSAlexei Starovoitov  * treat / or leading . specially; it isn't actually used for pathnames.
36*63066b7aSAlexei Starovoitov  *
37*63066b7aSAlexei Starovoitov  * Note that according to glob(7) (and unlike bash), character classes
38*63066b7aSAlexei Starovoitov  * are complemented by a leading !; this does not support the regex-style
39*63066b7aSAlexei Starovoitov  * [^a-z] syntax.
40*63066b7aSAlexei Starovoitov  *
41*63066b7aSAlexei Starovoitov  * An opening bracket without a matching close is matched literally.
42*63066b7aSAlexei Starovoitov  */
43*63066b7aSAlexei Starovoitov __noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
44*63066b7aSAlexei Starovoitov {
45*63066b7aSAlexei Starovoitov 	/*
46*63066b7aSAlexei Starovoitov 	 * Backtrack to previous * on mismatch and retry starting one
47*63066b7aSAlexei Starovoitov 	 * character later in the string.  Because * matches all characters
48*63066b7aSAlexei Starovoitov 	 * (no exception for /), it can be easily proved that there's
49*63066b7aSAlexei Starovoitov 	 * never a need to backtrack multiple levels.
50*63066b7aSAlexei Starovoitov 	 */
51*63066b7aSAlexei Starovoitov 	char const __arena *back_pat = NULL, *back_str;
52*63066b7aSAlexei Starovoitov 
53*63066b7aSAlexei Starovoitov 	/*
54*63066b7aSAlexei Starovoitov 	 * Loop over each token (character or class) in pat, matching
55*63066b7aSAlexei Starovoitov 	 * it against the remaining unmatched tail of str.  Return false
56*63066b7aSAlexei Starovoitov 	 * on mismatch, or true after matching the trailing nul bytes.
57*63066b7aSAlexei Starovoitov 	 */
58*63066b7aSAlexei Starovoitov 	for (;;) {
59*63066b7aSAlexei Starovoitov 		unsigned char c = *str++;
60*63066b7aSAlexei Starovoitov 		unsigned char d = *pat++;
61*63066b7aSAlexei Starovoitov 
62*63066b7aSAlexei Starovoitov 		switch (d) {
63*63066b7aSAlexei Starovoitov 		case '?':	/* Wildcard: anything but nul */
64*63066b7aSAlexei Starovoitov 			if (c == '\0')
65*63066b7aSAlexei Starovoitov 				return false;
66*63066b7aSAlexei Starovoitov 			break;
67*63066b7aSAlexei Starovoitov 		case '*':	/* Any-length wildcard */
68*63066b7aSAlexei Starovoitov 			if (*pat == '\0')	/* Optimize trailing * case */
69*63066b7aSAlexei Starovoitov 				return true;
70*63066b7aSAlexei Starovoitov 			back_pat = pat;
71*63066b7aSAlexei Starovoitov 			back_str = --str;	/* Allow zero-length match */
72*63066b7aSAlexei Starovoitov 			break;
73*63066b7aSAlexei Starovoitov 		case '[': {	/* Character class */
74*63066b7aSAlexei Starovoitov 			bool match = false, inverted = (*pat == '!');
75*63066b7aSAlexei Starovoitov 			char const __arena *class = pat + inverted;
76*63066b7aSAlexei Starovoitov 			unsigned char a = *class++;
77*63066b7aSAlexei Starovoitov 
78*63066b7aSAlexei Starovoitov 			/*
79*63066b7aSAlexei Starovoitov 			 * Iterate over each span in the character class.
80*63066b7aSAlexei Starovoitov 			 * A span is either a single character a, or a
81*63066b7aSAlexei Starovoitov 			 * range a-b.  The first span may begin with ']'.
82*63066b7aSAlexei Starovoitov 			 */
83*63066b7aSAlexei Starovoitov 			do {
84*63066b7aSAlexei Starovoitov 				unsigned char b = a;
85*63066b7aSAlexei Starovoitov 
86*63066b7aSAlexei Starovoitov 				if (a == '\0')	/* Malformed */
87*63066b7aSAlexei Starovoitov 					goto literal;
88*63066b7aSAlexei Starovoitov 
89*63066b7aSAlexei Starovoitov 				if (class[0] == '-' && class[1] != ']') {
90*63066b7aSAlexei Starovoitov 					b = class[1];
91*63066b7aSAlexei Starovoitov 
92*63066b7aSAlexei Starovoitov 					if (b == '\0')
93*63066b7aSAlexei Starovoitov 						goto literal;
94*63066b7aSAlexei Starovoitov 
95*63066b7aSAlexei Starovoitov 					class += 2;
96*63066b7aSAlexei Starovoitov 					/* Any special action if a > b? */
97*63066b7aSAlexei Starovoitov 				}
98*63066b7aSAlexei Starovoitov 				match |= (a <= c && c <= b);
99*63066b7aSAlexei Starovoitov 				cond_break;
100*63066b7aSAlexei Starovoitov 			} while ((a = *class++) != ']');
101*63066b7aSAlexei Starovoitov 
102*63066b7aSAlexei Starovoitov 			if (match == inverted)
103*63066b7aSAlexei Starovoitov 				goto backtrack;
104*63066b7aSAlexei Starovoitov 			pat = class;
105*63066b7aSAlexei Starovoitov 			}
106*63066b7aSAlexei Starovoitov 			break;
107*63066b7aSAlexei Starovoitov 		case '\\':
108*63066b7aSAlexei Starovoitov 			d = *pat++;
109*63066b7aSAlexei Starovoitov 			__attribute__((__fallthrough__));
110*63066b7aSAlexei Starovoitov 		default:	/* Literal character */
111*63066b7aSAlexei Starovoitov literal:
112*63066b7aSAlexei Starovoitov 			if (c == d) {
113*63066b7aSAlexei Starovoitov 				if (d == '\0')
114*63066b7aSAlexei Starovoitov 					return true;
115*63066b7aSAlexei Starovoitov 				break;
116*63066b7aSAlexei Starovoitov 			}
117*63066b7aSAlexei Starovoitov backtrack:
118*63066b7aSAlexei Starovoitov 			if (c == '\0' || !back_pat)
119*63066b7aSAlexei Starovoitov 				return false;	/* No point continuing */
120*63066b7aSAlexei Starovoitov 			/* Try again from last *, one character later in str. */
121*63066b7aSAlexei Starovoitov 			pat = back_pat;
122*63066b7aSAlexei Starovoitov 			str = ++back_str;
123*63066b7aSAlexei Starovoitov 			break;
124*63066b7aSAlexei Starovoitov 		}
125*63066b7aSAlexei Starovoitov 		cond_break;
126*63066b7aSAlexei Starovoitov 	}
127*63066b7aSAlexei Starovoitov 	return false;
128*63066b7aSAlexei Starovoitov }
129