xref: /freebsd/lib/libc/tests/regex/multibyte.sh (revision 8f7ed58a15556bf567ff876e1999e4fe4d684e1d)
1547bc083SYuri Pankovatf_test_case bmpat
2547bc083SYuri Pankovbmpat_head()
363cbe8d1SYuri Pankov{
463cbe8d1SYuri Pankov	atf_set "descr" "Check matching multibyte characters (PR153502)"
563cbe8d1SYuri Pankov}
6547bc083SYuri Pankovbmpat_body()
763cbe8d1SYuri Pankov{
863cbe8d1SYuri Pankov	export LC_CTYPE="C.UTF-8"
963cbe8d1SYuri Pankov
1063cbe8d1SYuri Pankov	printf 'é' | atf_check -o "inline:é" \
1163cbe8d1SYuri Pankov	    sed -ne '/^.$/p'
1263cbe8d1SYuri Pankov	printf 'éé' | atf_check -o "inline:éé" \
1363cbe8d1SYuri Pankov	    sed -ne '/^..$/p'
1463cbe8d1SYuri Pankov	printf 'aéa' | atf_check -o "inline:aéa" \
1563cbe8d1SYuri Pankov	    sed -ne '/a.a/p'
1663cbe8d1SYuri Pankov	printf 'aéa'| atf_check -o "inline:aéa" \
1763cbe8d1SYuri Pankov	    sed -ne '/a.*a/p'
1863cbe8d1SYuri Pankov	printf 'aaéaa' | atf_check -o "inline:aaéaa" \
1963cbe8d1SYuri Pankov	    sed -ne '/aa.aa/p'
2063cbe8d1SYuri Pankov	printf 'aéaéa' | atf_check -o "inline:aéaéa" \
2163cbe8d1SYuri Pankov	    sed -ne '/a.a.a/p'
2263cbe8d1SYuri Pankov	printf 'éa' | atf_check -o "inline:éa" \
2363cbe8d1SYuri Pankov	    sed -ne '/.a/p'
2463cbe8d1SYuri Pankov	printf 'aéaa' | atf_check -o "inline:aéaa" \
2563cbe8d1SYuri Pankov	    sed -ne '/a.aa/p'
2663cbe8d1SYuri Pankov	printf 'éaé' | atf_check -o "inline:éaé" \
2763cbe8d1SYuri Pankov	    sed -ne '/.a./p'
2863cbe8d1SYuri Pankov}
2963cbe8d1SYuri Pankov
30547bc083SYuri Pankovatf_test_case icase
31547bc083SYuri Pankovicase_head()
32547bc083SYuri Pankov{
33547bc083SYuri Pankov	atf_set "descr" "Check case-insensitive matching for characters 128-255"
34547bc083SYuri Pankov}
35547bc083SYuri Pankovicase_body()
36547bc083SYuri Pankov{
37547bc083SYuri Pankov	export LC_CTYPE="C.UTF-8"
38547bc083SYuri Pankov
39547bc083SYuri Pankov	a=$(printf '\302\265\n')	# U+00B5
40547bc083SYuri Pankov	b=$(printf '\316\234\n')	# U+039C
41547bc083SYuri Pankov	c=$(printf '\316\274\n')	# U+03BC
42547bc083SYuri Pankov
43547bc083SYuri Pankov	echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip"
44547bc083SYuri Pankov	echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip"
45547bc083SYuri Pankov}
46547bc083SYuri Pankov
47*8f7ed58aSBill Sommerfeldatf_test_case mbset cleanup
48*8f7ed58aSBill Sommerfeldmbset_head()
49*8f7ed58aSBill Sommerfeld{
50*8f7ed58aSBill Sommerfeld	atf_set "descr" "Check multibyte sets matching"
51*8f7ed58aSBill Sommerfeld}
52*8f7ed58aSBill Sommerfeldmbset_body()
53*8f7ed58aSBill Sommerfeld{
54*8f7ed58aSBill Sommerfeld	export LC_CTYPE="C.UTF-8"
55*8f7ed58aSBill Sommerfeld
56*8f7ed58aSBill Sommerfeld	# This involved an erroneously implemented optimization which reduces
57*8f7ed58aSBill Sommerfeld	# single-element sets to an exact match with a single codepoint.
58*8f7ed58aSBill Sommerfeld	# Match sets record small-codepoint characters in a bitmap and
59*8f7ed58aSBill Sommerfeld	# large-codepoint characters in an array; the optimization would falsely
60*8f7ed58aSBill Sommerfeld	# trigger if either the bitmap or the array was a singleton, ignoring
61*8f7ed58aSBill Sommerfeld	# the members of the other side of the set.
62*8f7ed58aSBill Sommerfeld	#
63*8f7ed58aSBill Sommerfeld	# To exercise this, we construct sets which have one member of one side
64*8f7ed58aSBill Sommerfeld	# and one or more of the other, and verify that all members can be
65*8f7ed58aSBill Sommerfeld	# found.
66*8f7ed58aSBill Sommerfeld	printf "a" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset
67*8f7ed58aSBill Sommerfeld	printf "à" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset
68*8f7ed58aSBill Sommerfeld	printf "a" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset
69*8f7ed58aSBill Sommerfeld	printf "à" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset
70*8f7ed58aSBill Sommerfeld	printf "á" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset
71*8f7ed58aSBill Sommerfeld	printf "à" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset
72*8f7ed58aSBill Sommerfeld	printf "a" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset
73*8f7ed58aSBill Sommerfeld	printf "b" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset
74*8f7ed58aSBill Sommerfeld	printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset
75*8f7ed58aSBill Sommerfeld	printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset
76*8f7ed58aSBill Sommerfeld	printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset
77*8f7ed58aSBill Sommerfeld	printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset
78*8f7ed58aSBill Sommerfeld	printf "á" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset
79*8f7ed58aSBill Sommerfeld	printf "à" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset
80*8f7ed58aSBill Sommerfeld	printf "a" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset
81*8f7ed58aSBill Sommerfeld	printf "b" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset
82*8f7ed58aSBill Sommerfeld}
83*8f7ed58aSBill Sommerfeldmbset_cleanup()
84*8f7ed58aSBill Sommerfeld{
85*8f7ed58aSBill Sommerfeld	rm -f mbset
86*8f7ed58aSBill Sommerfeld}
87*8f7ed58aSBill Sommerfeld
8863cbe8d1SYuri Pankovatf_init_test_cases()
8963cbe8d1SYuri Pankov{
90547bc083SYuri Pankov	atf_add_test_case bmpat
91547bc083SYuri Pankov	atf_add_test_case icase
92*8f7ed58aSBill Sommerfeld	atf_add_test_case mbset
9363cbe8d1SYuri Pankov}
94