xref: /freebsd/usr.bin/split/tests/split_test.sh (revision fd45b686f9d92f583366c75b22c04c7ee49709c0)
1#
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2022-2023 Klara Systems
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# 1. Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11# 2. Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in the
13#    documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25# SUCH DAMAGE.
26#
27
28# sys/param.h
29: ${MAXBSIZE:=65536}
30
31atf_test_case bytes
32bytes_body()
33{
34	printf "aaaa" > foo-aa
35	printf "bb\nc" > foo-ab
36	printf "ccc\n" > foo-ac
37
38	cat foo-* > foo
39	atf_check split -b 4 foo split-
40	atf_check -o file:foo-aa cat split-aa
41	atf_check -o file:foo-ab cat split-ab
42	atf_check -o file:foo-ac cat split-ac
43
44	# MAXBSIZE is the default buffer size, so we'll split at just a little
45	# bit past the buffer size to make sure that it still properly splits
46	# even when it needs to read again to hit the limit.
47	bsize=$((MAXBSIZE + 12))
48	rm foo-* foo
49	jot -ns "" -b "a" ${bsize} > foo-aa
50	jot -ns "" -b "b" ${bsize} > foo-ab
51	jot -ns "" -b "c" 12 > foo-ac
52
53	cat foo-* > foo
54	atf_check split -b ${bsize} foo split-
55	atf_check -o file:foo-aa cat split-aa
56	atf_check -o file:foo-ab cat split-ab
57	atf_check -o file:foo-ac cat split-ac
58}
59
60atf_test_case chunks
61chunks_body()
62{
63	jot -ns "" -b "a" 4096 > foo
64	jot -ns "" -b "b" 4096 >> foo
65	jot -ns "" -b "c" 4104 >> foo
66
67	chunks=3
68	jot -ns "" -b "a" 4096 > foo-aa
69	jot -ns "" -b "b" 2 >> foo-aa
70	jot -ns "" -b "b" 4094 > foo-ab
71	jot -ns "" -b "c" 4 >> foo-ab
72	jot -ns "" -b "c" 4100 > foo-ac
73
74	atf_check split -n ${chunks} foo split-
75	atf_check -o file:foo-aa cat split-aa
76	atf_check -o file:foo-ab cat split-ab
77	atf_check -o file:foo-ac cat split-ac
78}
79
80atf_test_case sensible_lines
81sensible_lines_body()
82{
83	echo "The quick brown fox" > foo-aa
84	echo "jumps over" > foo-ab
85	echo "the lazy dog" > foo-ac
86
87	cat foo-* > foo
88	atf_check split -l 1 foo split-
89	atf_check -o file:foo-aa cat split-aa
90	atf_check -o file:foo-ab cat split-ab
91	atf_check -o file:foo-ac cat split-ac
92
93	# Try again, make sure that `-` uses stdin as documented.
94	atf_check rm split-*
95	atf_check -x 'split -l 1 - split- < foo'
96	atf_check -o file:foo-aa cat split-aa
97	atf_check -o file:foo-ab cat split-ab
98	atf_check -o file:foo-ac cat split-ac
99
100	# Finally, try with -l == 2; we should see a 2/1 split instead of the
101	# previous 1/1/1.
102	cat foo-aa foo-ab > foo-aa-ng
103	cat foo-ac > foo-ab-ng
104
105	atf_check rm split-*
106	atf_check split -l 2 foo split-
107
108	atf_check -o file:foo-aa-ng cat split-aa
109	atf_check -o file:foo-ab-ng cat split-ab
110}
111
112atf_test_case long_lines
113long_lines_body()
114{
115
116	# Test file lines will be:
117	# a x MAXBSIZE
118	# b x MAXBSIZE + c x MAXBSIZE
119	# d x 1024
120	#
121	# The historical split(1) implementation wouldn't grow its internal
122	# buffer, so we'd end up with 2/3 split- files being wrong with -l 1.
123	# Notably, split-aa would include most of the first two lines, split-ab
124	# a tiny fraction of the second line, and split-ac the third line.
125	#
126	# Recent split(1) instead grows the buffer until we can either fit the
127	# line or we run out of memory.
128	jot -s "" -b "a" ${MAXBSIZE} > foo-aa
129	jot -ns "" -b "b" ${MAXBSIZE} > foo-ab
130	jot -s "" -b "c" ${MAXBSIZE} >> foo-ab
131	jot -s "" -b "d" 1024 > foo-ac
132
133	cat foo-* > foo
134	atf_check split -l 1 foo split-
135
136	atf_check -o file:foo-aa cat split-aa
137	atf_check -o file:foo-ab cat split-ab
138	atf_check -o file:foo-ac cat split-ac
139}
140
141atf_test_case numeric_suffix
142numeric_suffix_body()
143{
144	echo "The quick brown fox" > foo-00
145	echo "jumps over" > foo-01
146	echo "the lazy dog" > foo-02
147
148	cat foo-* > foo
149	atf_check split -d -l 1 foo split-
150
151	atf_check -o file:foo-00 cat split-00
152	atf_check -o file:foo-01 cat split-01
153	atf_check -o file:foo-02 cat split-02
154}
155
156atf_test_case larger_suffix_length
157larger_suffix_length_body()
158{
159	:> foo
160
161	# Generate foo-000 through foo-009, then foo-010 and foo-011
162	for i in $(seq -w 0 11); do
163		len=$((${i##0} + 1))
164		file="foo-0${i}"
165		jot -s "" -b "a" ${len} > ${file}
166		cat ${file} >> foo
167	done
168
169	atf_check split -a 3 -d -l 1 foo split-
170	for i in $(seq -w 0 11); do
171		srcfile="foo-0${i}"
172		splitfile="split-0${i}"
173		atf_check -o file:"${srcfile}" cat "${splitfile}"
174	done
175}
176
177atf_test_case pattern
178pattern_body()
179{
180
181	# Some fake yaml gives us a good realistic use-case for -p, as we can
182	# split on top-level stanzas.
183	cat <<EOF > foo-aa
184cat:
185  aa: true
186  ab: true
187  ac: true
188EOF
189	cat <<EOF > foo-ab
190dog:
191  ba: true
192  bb: true
193  bc: true
194EOF
195
196	cat foo-* > foo
197
198	atf_check split -p "^[^[:space:]]+:" foo split-
199	atf_check -o file:foo-aa cat split-aa
200	atf_check -o file:foo-ab cat split-ab
201}
202
203atf_test_case autoextend
204autoextend_body()
205{
206	seq $((26*25+1)) >input
207	atf_check split -l1 input
208	atf_check -o inline:"$((26*25))\n" cat xyz
209	atf_check -o inline:"$((26*25+1))\n" cat xzaaa
210}
211
212atf_test_case noautoextend
213noautoextend_body()
214{
215	seq $((26*26)) >input
216	atf_check split -a2 -l1 input
217	atf_check -o inline:"$((26*26))\n" cat xzz
218}
219
220atf_test_case reautoextend
221reautoextend_body()
222{
223	seq $((26*25+1)) >input
224	atf_check split -a2 -a0 -l1 input
225	atf_check -o inline:"$((26*25))\n" cat xyz
226	atf_check -o inline:"$((26*25+1))\n" cat xzaaa
227}
228
229atf_test_case continue
230continue_body()
231{
232	echo hello >input
233	atf_check split input
234	atf_check -o file:input cat xaa
235	atf_check -s exit:1 -e ignore cat xab
236	atf_check split -c input
237	atf_check -o file:input cat xab
238}
239
240atf_test_case undocumented_kludge
241undocumented_kludge_body()
242{
243	seq 5000 >input
244	atf_check split -1000 input
245	atf_check -o file:xae seq 4001 5000
246	atf_check split -d1000 input
247	atf_check -o file:x04 seq 4001 5000
248}
249
250atf_test_case duplicate_linecount
251duplicate_linecount_body()
252{
253	atf_check -s exit:64 -e ignore split -5 -5 /dev/null
254	atf_check -s exit:64 -e ignore split -l5 -5 /dev/null
255	atf_check -s exit:64 -e ignore split -5 -l5 /dev/null
256	atf_check -s exit:64 -e ignore split -l5 -l5 /dev/null
257}
258
259atf_init_test_cases()
260{
261	atf_add_test_case bytes
262	atf_add_test_case chunks
263	atf_add_test_case sensible_lines
264	atf_add_test_case long_lines
265	atf_add_test_case numeric_suffix
266	atf_add_test_case larger_suffix_length
267	atf_add_test_case pattern
268	atf_add_test_case autoextend
269	atf_add_test_case noautoextend
270	atf_add_test_case reautoextend
271	atf_add_test_case continue
272	atf_add_test_case undocumented_kludge
273	atf_add_test_case duplicate_linecount
274}
275