xref: /freebsd/usr.bin/split/tests/split_test.sh (revision 7543a9c0280a0f4262489671936a6e03b9b2c563)
1#
2# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3#
4# Copyright (c) 2022 Klara Systems
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# 1. Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11# 2. Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in the
13#    documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25# SUCH DAMAGE.
26#
27# $FreeBSD$
28
29# sys/param.h
30: ${MAXBSIZE:=65536}
31
32atf_test_case bytes
33bytes_body()
34{
35	printf "aaaa" > foo-aa
36	printf "bb\nc" > foo-ab
37	printf "ccc\n" > foo-ac
38
39	cat foo-* > foo
40	atf_check split -b 4 foo split-
41	atf_check -o file:foo-aa cat split-aa
42	atf_check -o file:foo-ab cat split-ab
43	atf_check -o file:foo-ac cat split-ac
44
45	# MAXBSIZE is the default buffer size, so we'll split at just a little
46	# bit past the buffer size to make sure that it still properly splits
47	# even when it needs to read again to hit the limit.
48	bsize=$((MAXBSIZE + 12))
49	rm foo-* foo
50	jot -ns "" -b "a" ${bsize} > foo-aa
51	jot -ns "" -b "b" ${bsize} > foo-ab
52	jot -ns "" -b "c" 12 > foo-ac
53
54	cat foo-* > foo
55	atf_check split -b ${bsize} foo split-
56	atf_check -o file:foo-aa cat split-aa
57	atf_check -o file:foo-ab cat split-ab
58	atf_check -o file:foo-ac cat split-ac
59}
60
61atf_test_case chunks
62chunks_body()
63{
64	jot -ns "" -b "a" 4096 > foo
65	jot -ns "" -b "b" 4096 >> foo
66	jot -ns "" -b "c" 4104 >> foo
67
68	chunks=3
69	jot -ns "" -b "a" 4096 > foo-aa
70	jot -ns "" -b "b" 2 >> foo-aa
71	jot -ns "" -b "b" 4094 > foo-ab
72	jot -ns "" -b "c" 4 >> foo-ab
73	jot -ns "" -b "c" 4100 > foo-ac
74
75	atf_check split -n ${chunks} foo split-
76	atf_check -o file:foo-aa cat split-aa
77	atf_check -o file:foo-ab cat split-ab
78	atf_check -o file:foo-ac cat split-ac
79}
80
81atf_test_case sensible_lines
82sensible_lines_body()
83{
84	echo "The quick brown fox" > foo-aa
85	echo "jumps over" > foo-ab
86	echo "the lazy dog" > foo-ac
87
88	cat foo-* > foo
89	atf_check split -l 1 foo split-
90	atf_check -o file:foo-aa cat split-aa
91	atf_check -o file:foo-ab cat split-ab
92	atf_check -o file:foo-ac cat split-ac
93
94	# Try again, make sure that `-` uses stdin as documented.
95	atf_check rm split-*
96	atf_check -x 'split -l 1 - split- < foo'
97	atf_check -o file:foo-aa cat split-aa
98	atf_check -o file:foo-ab cat split-ab
99	atf_check -o file:foo-ac cat split-ac
100
101	# Finally, try with -l == 2; we should see a 2/1 split instead of the
102	# previous 1/1/1.
103	cat foo-aa foo-ab > foo-aa-ng
104	cat foo-ac > foo-ab-ng
105
106	atf_check rm split-*
107	atf_check split -l 2 foo split-
108
109	atf_check -o file:foo-aa-ng cat split-aa
110	atf_check -o file:foo-ab-ng cat split-ab
111}
112
113atf_test_case long_lines
114long_lines_body()
115{
116
117	# Test file lines will be:
118	# a x MAXBSIZE
119	# b x MAXBSIZE + c x MAXBSIZE
120	# d x 1024
121	#
122	# The historical split(1) implementation wouldn't grow its internal
123	# buffer, so we'd end up with 2/3 split- files being wrong with -l 1.
124	# Notably, split-aa would include most of the first two lines, split-ab
125	# a tiny fraction of the second line, and split-ac the third line.
126	#
127	# Recent split(1) instead grows the buffer until we can either fit the
128	# line or we run out of memory.
129	jot -s "" -b "a" ${MAXBSIZE} > foo-aa
130	jot -ns "" -b "b" ${MAXBSIZE} > foo-ab
131	jot -s "" -b "c" ${MAXBSIZE} >> foo-ab
132	jot -s "" -b "d" 1024 > foo-ac
133
134	cat foo-* > foo
135	atf_check split -l 1 foo split-
136
137	atf_check -o file:foo-aa cat split-aa
138	atf_check -o file:foo-ab cat split-ab
139	atf_check -o file:foo-ac cat split-ac
140}
141
142atf_test_case numeric_suffix
143numeric_suffix_body()
144{
145	echo "The quick brown fox" > foo-00
146	echo "jumps over" > foo-01
147	echo "the lazy dog" > foo-02
148
149	cat foo-* > foo
150	atf_check split -d -l 1 foo split-
151
152	atf_check -o file:foo-00 cat split-00
153	atf_check -o file:foo-01 cat split-01
154	atf_check -o file:foo-02 cat split-02
155}
156
157atf_test_case larger_suffix_length
158larger_suffix_length_body()
159{
160	:> foo
161
162	# Generate foo-000 through foo-009, then foo-010 and foo-011
163	for i in $(seq -w 0 11); do
164		len=$((${i##0} + 1))
165		file="foo-0${i}"
166		jot -s "" -b "a" ${len} > ${file}
167		cat ${file} >> foo
168	done
169
170	atf_check split -a 3 -d -l 1 foo split-
171	for i in $(seq -w 0 11); do
172		srcfile="foo-0${i}"
173		splitfile="split-0${i}"
174		atf_check -o file:"${srcfile}" cat "${splitfile}"
175	done
176}
177
178atf_test_case pattern
179pattern_body()
180{
181
182	# Some fake yaml gives us a good realistic use-case for -p, as we can
183	# split on top-level stanzas.
184	cat <<EOF > foo-aa
185cat:
186  aa: true
187  ab: true
188  ac: true
189EOF
190	cat <<EOF > foo-ab
191dog:
192  ba: true
193  bb: true
194  bc: true
195EOF
196
197	cat foo-* > foo
198
199	atf_check split -p "^[^[:space:]]+:" foo split-
200	atf_check -o file:foo-aa cat split-aa
201	atf_check -o file:foo-ab cat split-ab
202}
203
204atf_init_test_cases()
205{
206	atf_add_test_case bytes
207	atf_add_test_case chunks
208	atf_add_test_case sensible_lines
209	atf_add_test_case long_lines
210	atf_add_test_case numeric_suffix
211	atf_add_test_case larger_suffix_length
212	atf_add_test_case pattern
213}
214