xref: /freebsd/lib/libfigpar/string_m.c (revision 041394f38a59889f0e14ace3306df5310cd5aeac)
1*041394f3SDevin Teske /*-
2*041394f3SDevin Teske  * Copyright (c) 2001-2014 Devin Teske <dteske@FreeBSD.org>
3*041394f3SDevin Teske  * All rights reserved.
4*041394f3SDevin Teske  *
5*041394f3SDevin Teske  * Redistribution and use in source and binary forms, with or without
6*041394f3SDevin Teske  * modification, are permitted provided that the following conditions
7*041394f3SDevin Teske  * are met:
8*041394f3SDevin Teske  * 1. Redistributions of source code must retain the above copyright
9*041394f3SDevin Teske  *    notice, this list of conditions and the following disclaimer.
10*041394f3SDevin Teske  * 2. Redistributions in binary form must reproduce the above copyright
11*041394f3SDevin Teske  *    notice, this list of conditions and the following disclaimer in the
12*041394f3SDevin Teske  *    documentation and/or other materials provided with the distribution.
13*041394f3SDevin Teske  *
14*041394f3SDevin Teske  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15*041394f3SDevin Teske  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*041394f3SDevin Teske  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*041394f3SDevin Teske  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18*041394f3SDevin Teske  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*041394f3SDevin Teske  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*041394f3SDevin Teske  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*041394f3SDevin Teske  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*041394f3SDevin Teske  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*041394f3SDevin Teske  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*041394f3SDevin Teske  * SUCH DAMAGE.
25*041394f3SDevin Teske  */
26*041394f3SDevin Teske 
27*041394f3SDevin Teske #include <sys/cdefs.h>
28*041394f3SDevin Teske __FBSDID("$FreeBSD$");
29*041394f3SDevin Teske 
30*041394f3SDevin Teske #include <sys/types.h>
31*041394f3SDevin Teske 
32*041394f3SDevin Teske #include <ctype.h>
33*041394f3SDevin Teske #include <errno.h>
34*041394f3SDevin Teske #include <stdio.h>
35*041394f3SDevin Teske #include <stdlib.h>
36*041394f3SDevin Teske #include <string.h>
37*041394f3SDevin Teske 
38*041394f3SDevin Teske #include "string_m.h"
39*041394f3SDevin Teske 
40*041394f3SDevin Teske /*
41*041394f3SDevin Teske  * Counts the number of occurrences of one string that appear in the source
42*041394f3SDevin Teske  * string. Return value is the total count.
43*041394f3SDevin Teske  *
44*041394f3SDevin Teske  * An example use would be if you need to know how large a block of memory
45*041394f3SDevin Teske  * needs to be for a replaceall() series.
46*041394f3SDevin Teske  */
47*041394f3SDevin Teske unsigned int
48*041394f3SDevin Teske strcount(const char *source, const char *find)
49*041394f3SDevin Teske {
50*041394f3SDevin Teske 	const char *p = source;
51*041394f3SDevin Teske 	size_t flen;
52*041394f3SDevin Teske 	unsigned int n = 0;
53*041394f3SDevin Teske 
54*041394f3SDevin Teske 	/* Both parameters are required */
55*041394f3SDevin Teske 	if (source == NULL || find == NULL)
56*041394f3SDevin Teske 		return (0);
57*041394f3SDevin Teske 
58*041394f3SDevin Teske 	/* Cache the length of find element */
59*041394f3SDevin Teske 	flen = strlen(find);
60*041394f3SDevin Teske 	if (strlen(source) == 0 || flen == 0)
61*041394f3SDevin Teske 		return (0);
62*041394f3SDevin Teske 
63*041394f3SDevin Teske 	/* Loop until the end of the string */
64*041394f3SDevin Teske 	while (*p != '\0') {
65*041394f3SDevin Teske 		if (strncmp(p, find, flen) == 0) { /* found an instance */
66*041394f3SDevin Teske 			p += flen;
67*041394f3SDevin Teske 			n++;
68*041394f3SDevin Teske 		} else
69*041394f3SDevin Teske 			p++;
70*041394f3SDevin Teske 	}
71*041394f3SDevin Teske 
72*041394f3SDevin Teske 	return (n);
73*041394f3SDevin Teske }
74*041394f3SDevin Teske 
75*041394f3SDevin Teske /*
76*041394f3SDevin Teske  * Replaces all occurrences of `find' in `source' with `replace'.
77*041394f3SDevin Teske  *
78*041394f3SDevin Teske  * You should not pass a string constant as the first parameter, it needs to be
79*041394f3SDevin Teske  * a pointer to an allocated block of memory. The block of memory that source
80*041394f3SDevin Teske  * points to should be large enough to hold the result. If the length of the
81*041394f3SDevin Teske  * replacement string is greater than the length of the find string, the result
82*041394f3SDevin Teske  * will be larger than the original source string. To allocate enough space for
83*041394f3SDevin Teske  * the result, use the function strcount() declared above to determine the
84*041394f3SDevin Teske  * number of occurrences and how much larger the block size needs to be.
85*041394f3SDevin Teske  *
86*041394f3SDevin Teske  * If source is not large enough, the application will crash. The return value
87*041394f3SDevin Teske  * is the length (in bytes) of the result.
88*041394f3SDevin Teske  *
89*041394f3SDevin Teske  * When an error occurs, -1 is returned and the global variable errno is set
90*041394f3SDevin Teske  * accordingly. Returns zero on success.
91*041394f3SDevin Teske  */
92*041394f3SDevin Teske int
93*041394f3SDevin Teske replaceall(char *source, const char *find, const char *replace)
94*041394f3SDevin Teske {
95*041394f3SDevin Teske 	char *p;
96*041394f3SDevin Teske 	char *t;
97*041394f3SDevin Teske 	char *temp;
98*041394f3SDevin Teske 	size_t flen;
99*041394f3SDevin Teske 	size_t rlen;
100*041394f3SDevin Teske 	size_t slen;
101*041394f3SDevin Teske 	uint32_t n = 0;
102*041394f3SDevin Teske 
103*041394f3SDevin Teske 	errno = 0; /* reset global error number */
104*041394f3SDevin Teske 
105*041394f3SDevin Teske 	/* Check that we have non-null parameters */
106*041394f3SDevin Teske 	if (source == NULL)
107*041394f3SDevin Teske 		return (0);
108*041394f3SDevin Teske 	if (find == NULL)
109*041394f3SDevin Teske 		return (strlen(source));
110*041394f3SDevin Teske 
111*041394f3SDevin Teske 	/* Cache the length of the strings */
112*041394f3SDevin Teske 	slen = strlen(source);
113*041394f3SDevin Teske 	flen = strlen(find);
114*041394f3SDevin Teske 	rlen = replace ? strlen(replace) : 0;
115*041394f3SDevin Teske 
116*041394f3SDevin Teske 	/* Cases where no replacements need to be made */
117*041394f3SDevin Teske 	if (slen == 0 || flen == 0 || slen < flen)
118*041394f3SDevin Teske 		return (slen);
119*041394f3SDevin Teske 
120*041394f3SDevin Teske 	/* If replace is longer than find, we'll need to create a temp copy */
121*041394f3SDevin Teske 	if (rlen > flen) {
122*041394f3SDevin Teske 		temp = malloc(slen + 1);
123*041394f3SDevin Teske 		if (errno != 0) /* could not allocate memory */
124*041394f3SDevin Teske 			return (-1);
125*041394f3SDevin Teske 		strcpy(temp, source);
126*041394f3SDevin Teske 	} else
127*041394f3SDevin Teske 		temp = source;
128*041394f3SDevin Teske 
129*041394f3SDevin Teske 	/* Reconstruct the string with the replacements */
130*041394f3SDevin Teske 	p = source; t = temp; /* position elements */
131*041394f3SDevin Teske 
132*041394f3SDevin Teske 	while (*t != '\0') {
133*041394f3SDevin Teske 		if (strncmp(t, find, flen) == 0) {
134*041394f3SDevin Teske 			/* found an occurrence */
135*041394f3SDevin Teske 			for (n = 0; replace && replace[n]; n++)
136*041394f3SDevin Teske 				*p++ = replace[n];
137*041394f3SDevin Teske 			t += flen;
138*041394f3SDevin Teske 		} else
139*041394f3SDevin Teske 			*p++ = *t++; /* copy character and increment */
140*041394f3SDevin Teske 	}
141*041394f3SDevin Teske 
142*041394f3SDevin Teske 	/* Terminate the string */
143*041394f3SDevin Teske 	*p = '\0';
144*041394f3SDevin Teske 
145*041394f3SDevin Teske 	/* Free the temporary allocated memory */
146*041394f3SDevin Teske 	if (temp != source)
147*041394f3SDevin Teske 		free(temp);
148*041394f3SDevin Teske 
149*041394f3SDevin Teske 	/* Return the length of the completed string */
150*041394f3SDevin Teske 	return (strlen(source));
151*041394f3SDevin Teske }
152*041394f3SDevin Teske 
153*041394f3SDevin Teske /*
154*041394f3SDevin Teske  * Expands escape sequences in a buffer pointed to by `source'. This function
155*041394f3SDevin Teske  * steps through each character, and converts escape sequences such as "\n",
156*041394f3SDevin Teske  * "\r", "\t" and others into their respective meanings.
157*041394f3SDevin Teske  *
158*041394f3SDevin Teske  * You should not pass a string constant or literal to this function or the
159*041394f3SDevin Teske  * program will likely segmentation fault when it tries to modify the data.
160*041394f3SDevin Teske  *
161*041394f3SDevin Teske  * The string length will either shorten or stay the same depending on whether
162*041394f3SDevin Teske  * any escape sequences were converted but the amount of memory allocated does
163*041394f3SDevin Teske  * not change.
164*041394f3SDevin Teske  *
165*041394f3SDevin Teske  * Interpreted sequences are:
166*041394f3SDevin Teske  *
167*041394f3SDevin Teske  * 	\0NNN	character with octal value NNN (0 to 3 digits)
168*041394f3SDevin Teske  * 	\N	character with octal value N (0 thru 7)
169*041394f3SDevin Teske  * 	\a	alert (BEL)
170*041394f3SDevin Teske  * 	\b	backslash
171*041394f3SDevin Teske  * 	\f	form feed
172*041394f3SDevin Teske  * 	\n	new line
173*041394f3SDevin Teske  * 	\r	carriage return
174*041394f3SDevin Teske  * 	\t	horizontal tab
175*041394f3SDevin Teske  * 	\v	vertical tab
176*041394f3SDevin Teske  * 	\xNN	byte with hexadecimal value NN (1 to 2 digits)
177*041394f3SDevin Teske  *
178*041394f3SDevin Teske  * All other sequences are unescaped (ie. '\"' and '\#').
179*041394f3SDevin Teske  */
180*041394f3SDevin Teske void strexpand(char *source)
181*041394f3SDevin Teske {
182*041394f3SDevin Teske 	uint8_t c;
183*041394f3SDevin Teske 	char *chr;
184*041394f3SDevin Teske 	char *pos;
185*041394f3SDevin Teske 	char d[4];
186*041394f3SDevin Teske 
187*041394f3SDevin Teske 	/* Initialize position elements */
188*041394f3SDevin Teske 	pos = chr = source;
189*041394f3SDevin Teske 
190*041394f3SDevin Teske 	/* Loop until we hit the end of the string */
191*041394f3SDevin Teske 	while (*pos != '\0') {
192*041394f3SDevin Teske 		if (*chr != '\\') {
193*041394f3SDevin Teske 			*pos = *chr; /* copy character to current offset */
194*041394f3SDevin Teske 			pos++;
195*041394f3SDevin Teske 			chr++;
196*041394f3SDevin Teske 			continue;
197*041394f3SDevin Teske 		}
198*041394f3SDevin Teske 
199*041394f3SDevin Teske 		/* Replace the backslash with the correct character */
200*041394f3SDevin Teske 		switch (*++chr) {
201*041394f3SDevin Teske 		case 'a': *pos = '\a'; break; /* bell/alert (BEL) */
202*041394f3SDevin Teske 		case 'b': *pos = '\b'; break; /* backspace */
203*041394f3SDevin Teske 		case 'f': *pos = '\f'; break; /* form feed */
204*041394f3SDevin Teske 		case 'n': *pos = '\n'; break; /* new line */
205*041394f3SDevin Teske 		case 'r': *pos = '\r'; break; /* carriage return */
206*041394f3SDevin Teske 		case 't': *pos = '\t'; break; /* horizontal tab */
207*041394f3SDevin Teske 		case 'v': *pos = '\v'; break; /* vertical tab */
208*041394f3SDevin Teske 		case 'x': /* hex value (1 to 2 digits)(\xNN) */
209*041394f3SDevin Teske 			d[2] = '\0'; /* pre-terminate the string */
210*041394f3SDevin Teske 
211*041394f3SDevin Teske 			/* verify next two characters are hex */
212*041394f3SDevin Teske 			d[0] = isxdigit(*(chr+1)) ? *++chr : '\0';
213*041394f3SDevin Teske 			if (d[0] != '\0')
214*041394f3SDevin Teske 				d[1] = isxdigit(*(chr+1)) ? *++chr : '\0';
215*041394f3SDevin Teske 
216*041394f3SDevin Teske 			/* convert the characters to decimal */
217*041394f3SDevin Teske 			c = (uint8_t)strtoul(d, 0, 16);
218*041394f3SDevin Teske 
219*041394f3SDevin Teske 			/* assign the converted value */
220*041394f3SDevin Teske 			*pos = (c != 0 || d[0] == '0') ? c : *++chr;
221*041394f3SDevin Teske 			break;
222*041394f3SDevin Teske 		case '0': /* octal value (0 to 3 digits)(\0NNN) */
223*041394f3SDevin Teske 			d[3] = '\0'; /* pre-terminate the string */
224*041394f3SDevin Teske 
225*041394f3SDevin Teske 			/* verify next three characters are octal */
226*041394f3SDevin Teske 			d[0] = (isdigit(*(chr+1)) && *(chr+1) < '8') ?
227*041394f3SDevin Teske 			    *++chr : '\0';
228*041394f3SDevin Teske 			if (d[0] != '\0')
229*041394f3SDevin Teske 				d[1] = (isdigit(*(chr+1)) && *(chr+1) < '8') ?
230*041394f3SDevin Teske 				    *++chr : '\0';
231*041394f3SDevin Teske 			if (d[1] != '\0')
232*041394f3SDevin Teske 				d[2] = (isdigit(*(chr+1)) && *(chr+1) < '8') ?
233*041394f3SDevin Teske 				    *++chr : '\0';
234*041394f3SDevin Teske 
235*041394f3SDevin Teske 			/* convert the characters to decimal */
236*041394f3SDevin Teske 			c = (uint8_t)strtoul(d, 0, 8);
237*041394f3SDevin Teske 
238*041394f3SDevin Teske 			/* assign the converted value */
239*041394f3SDevin Teske 			*pos = c;
240*041394f3SDevin Teske 			break;
241*041394f3SDevin Teske 		default: /* single octal (\0..7) or unknown sequence */
242*041394f3SDevin Teske 			if (isdigit(*chr) && *chr < '8') {
243*041394f3SDevin Teske 				d[0] = *chr;
244*041394f3SDevin Teske 				d[1] = '\0';
245*041394f3SDevin Teske 				*pos = (uint8_t)strtoul(d, 0, 8);
246*041394f3SDevin Teske 			} else
247*041394f3SDevin Teske 				*pos = *chr;
248*041394f3SDevin Teske 		}
249*041394f3SDevin Teske 
250*041394f3SDevin Teske 		/* Increment to next offset, possible next escape sequence */
251*041394f3SDevin Teske 		pos++;
252*041394f3SDevin Teske 		chr++;
253*041394f3SDevin Teske 	}
254*041394f3SDevin Teske }
255*041394f3SDevin Teske 
256*041394f3SDevin Teske /*
257*041394f3SDevin Teske  * Expand only the escaped newlines in a buffer pointed to by `source'. This
258*041394f3SDevin Teske  * function steps through each character, and converts the "\n" sequence into
259*041394f3SDevin Teske  * a literal newline and the "\\n" sequence into "\n".
260*041394f3SDevin Teske  *
261*041394f3SDevin Teske  * You should not pass a string constant or literal to this function or the
262*041394f3SDevin Teske  * program will likely segmentation fault when it tries to modify the data.
263*041394f3SDevin Teske  *
264*041394f3SDevin Teske  * The string length will either shorten or stay the same depending on whether
265*041394f3SDevin Teske  * any escaped newlines were converted but the amount of memory allocated does
266*041394f3SDevin Teske  * not change.
267*041394f3SDevin Teske  */
268*041394f3SDevin Teske void strexpandnl(char *source)
269*041394f3SDevin Teske {
270*041394f3SDevin Teske 	uint8_t backslash = 0;
271*041394f3SDevin Teske 	char *cp1;
272*041394f3SDevin Teske 	char *cp2;
273*041394f3SDevin Teske 
274*041394f3SDevin Teske 	/* Replace '\n' with literal in dprompt */
275*041394f3SDevin Teske 	cp1 = cp2 = source;
276*041394f3SDevin Teske 	while (*cp2 != '\0') {
277*041394f3SDevin Teske 		*cp1 = *cp2;
278*041394f3SDevin Teske 		if (*cp2 == '\\')
279*041394f3SDevin Teske 			backslash++;
280*041394f3SDevin Teske 		else if (*cp2 != 'n')
281*041394f3SDevin Teske 			backslash = 0;
282*041394f3SDevin Teske 		else if (backslash > 0) {
283*041394f3SDevin Teske 			*(--cp1) = (backslash & 1) == 1 ? '\n' : 'n';
284*041394f3SDevin Teske 			backslash = 0;
285*041394f3SDevin Teske 		}
286*041394f3SDevin Teske 		cp1++;
287*041394f3SDevin Teske 		cp2++;
288*041394f3SDevin Teske 	}
289*041394f3SDevin Teske 	*cp1 = *cp2;
290*041394f3SDevin Teske }
291*041394f3SDevin Teske 
292*041394f3SDevin Teske /*
293*041394f3SDevin Teske  * Convert a string to lower case. You should not pass a string constant to
294*041394f3SDevin Teske  * this function. Only pass pointers to allocated memory with null terminated
295*041394f3SDevin Teske  * string data.
296*041394f3SDevin Teske  */
297*041394f3SDevin Teske void
298*041394f3SDevin Teske strtolower(char *source)
299*041394f3SDevin Teske {
300*041394f3SDevin Teske 	char *p = source;
301*041394f3SDevin Teske 
302*041394f3SDevin Teske 	if (source == NULL)
303*041394f3SDevin Teske 		return;
304*041394f3SDevin Teske 
305*041394f3SDevin Teske 	while (*p != '\0') {
306*041394f3SDevin Teske 		*p = tolower(*p);
307*041394f3SDevin Teske 		p++; /* would have just used `*p++' but gcc 3.x warns */
308*041394f3SDevin Teske 	}
309*041394f3SDevin Teske }
310