1 /**
2 * \file cook.c
3 *
4 * This file contains the routines that deal with processing quoted strings
5 * into an internal format.
6 *
7 * @addtogroup autoopts
8 * @{
9 */
10 /*
11 * This file is part of AutoOpts, a companion to AutoGen.
12 * AutoOpts is free software.
13 * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
14 *
15 * AutoOpts is available under any one of two licenses. The license
16 * in use must be one of these two and the choice is under the control
17 * of the user of the license.
18 *
19 * The GNU Lesser General Public License, version 3 or later
20 * See the files "COPYING.lgplv3" and "COPYING.gplv3"
21 *
22 * The Modified Berkeley Software Distribution License
23 * See the file "COPYING.mbsd"
24 *
25 * These files have the following sha256 sums:
26 *
27 * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
28 * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
29 * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
30 */
31
32 /*=export_func ao_string_cook_escape_char
33 * private:
34 *
35 * what: escape-process a string fragment
36 * arg: + char const * + pzScan + points to character after the escape +
37 * arg: + char * + pRes + Where to put the result byte +
38 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
39 *
40 * ret-type: unsigned int
41 * ret-desc: The number of bytes consumed processing the escaped character.
42 *
43 * doc:
44 *
45 * This function converts "t" into "\t" and all your other favorite
46 * escapes, including numeric ones: hex and ocatal, too.
47 * The returned result tells the caller how far to advance the
48 * scan pointer (passed in). The default is to just pass through the
49 * escaped character and advance the scan by one.
50 *
51 * Some applications need to keep an escaped newline, others need to
52 * suppress it. This is accomplished by supplying a '\n' replacement
53 * character that is different from \n, if need be. For example, use
54 * 0x7F and never emit a 0x7F.
55 *
56 * err: @code{NULL} is returned if the string is mal-formed.
57 =*/
58 unsigned int
ao_string_cook_escape_char(char const * pzIn,char * pRes,uint_t nl)59 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
60 {
61 unsigned int res = 1;
62
63 switch (*pRes = *pzIn++) {
64 case NUL: /* NUL - end of input string */
65 return 0;
66 case '\r':
67 if (*pzIn != NL)
68 return 1;
69 res++;
70 /* FALLTHROUGH */
71 case NL: /* NL - emit newline */
72 *pRes = (char)nl;
73 return res;
74
75 case 'a': *pRes = '\a'; break;
76 case 'b': *pRes = '\b'; break;
77 case 'f': *pRes = '\f'; break;
78 case 'n': *pRes = NL; break;
79 case 'r': *pRes = '\r'; break;
80 case 't': *pRes = '\t'; break;
81 case 'v': *pRes = '\v'; break;
82
83 case 'x':
84 case 'X': /* HEX Escape */
85 if (IS_HEX_DIGIT_CHAR(*pzIn)) {
86 char z[4];
87 unsigned int ct = 0;
88
89 do {
90 z[ct] = pzIn[ct];
91 if (++ct >= 2)
92 break;
93 } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
94 z[ct] = NUL;
95 *pRes = (char)strtoul(z, NULL, 16);
96 return ct + 1;
97 }
98 break;
99
100 case '0': case '1': case '2': case '3':
101 case '4': case '5': case '6': case '7':
102 {
103 /*
104 * IF the character copied was an octal digit,
105 * THEN set the output character to an octal value.
106 * The 3 octal digit result might exceed 0xFF, so check it.
107 */
108 char z[4];
109 unsigned long val;
110 unsigned int ct = 0;
111
112 z[ct++] = *--pzIn;
113 while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
114 z[ct] = pzIn[ct];
115 if (++ct >= 3)
116 break;
117 }
118
119 z[ct] = NUL;
120 val = strtoul(z, NULL, 8);
121 if (val > 0xFF)
122 val = 0xFF;
123 *pRes = (char)val;
124 return ct;
125 }
126
127 default: /* quoted character is result character */;
128 }
129
130 return res;
131 }
132
133 /**
134 * count newlines between start and end
135 */
136 static char *
nl_count(char * start,char * end,int * lnct_p)137 nl_count(char * start, char * end, int * lnct_p)
138 {
139 while (start < end) {
140 if (*(start++) == NL)
141 (*lnct_p)++;
142 }
143 return end;
144 }
145
146 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
147 *
148 * A quoted string has been found.
149 * Find the end of it and compress any escape sequences.
150 */
151 static bool
contiguous_quote(char ** pps,char * pq,int * lnct_p)152 contiguous_quote(char ** pps, char * pq, int * lnct_p)
153 {
154 char * ps = *pps + 1;
155
156 for (;;) {
157 while (IS_WHITESPACE_CHAR(*ps))
158 if (*(ps++) == NL)
159 (*lnct_p)++;
160
161 /*
162 * IF the next character is a quote character,
163 * THEN we will concatenate the strings.
164 */
165 switch (*ps) {
166 case '"':
167 case '\'':
168 *pq = *(ps++); /* assign new quote character and return */
169 *pps = ps;
170 return true;
171
172 case '/':
173 /*
174 * Allow for a comment embedded in the concatenated string.
175 */
176 switch (ps[1]) {
177 default:
178 goto fail_return;
179
180 case '/':
181 /*
182 * Skip to end of line
183 */
184 ps = strchr(ps, NL);
185 if (ps == NULL)
186 goto fail_return;
187 break;
188
189 case '*':
190 ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
191 if (ps == NULL)
192 goto fail_return;
193 ps += 2;
194 }
195 continue;
196
197 default:
198 /*
199 * The next non-whitespace character is not a quote.
200 * The series of quoted strings has come to an end.
201 */
202 *pps = ps;
203 return false;
204 }
205 }
206
207 fail_return:
208 *pps = NULL;
209 return false;
210 }
211
212 /*=export_func ao_string_cook
213 * private:
214 *
215 * what: concatenate and escape-process strings
216 * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
217 * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
218 *
219 * ret-type: char *
220 * ret-desc: The address of the text following the processed strings.
221 * The return value is NULL if the strings are ill-formed.
222 *
223 * doc:
224 *
225 * A series of one or more quoted strings are concatenated together.
226 * If they are quoted with double quotes (@code{"}), then backslash
227 * escapes are processed per the C programming language. If they are
228 * single quote strings, then the backslashes are honored only when they
229 * precede another backslash or a single quote character.
230 *
231 * err: @code{NULL} is returned if the string(s) is/are mal-formed.
232 =*/
233 char *
ao_string_cook(char * pzScan,int * lnct_p)234 ao_string_cook(char * pzScan, int * lnct_p)
235 {
236 int l = 0;
237 char q = *pzScan;
238
239 /*
240 * It is a quoted string. Process the escape sequence characters
241 * (in the set "abfnrtv") and make sure we find a closing quote.
242 */
243 char * pzD = pzScan++;
244 char * pzS = pzScan;
245
246 if (lnct_p == NULL)
247 lnct_p = &l;
248
249 for (;;) {
250 /*
251 * IF the next character is the quote character, THEN we may end the
252 * string. We end it unless the next non-blank character *after* the
253 * string happens to also be a quote. If it is, then we will change
254 * our quote character to the new quote character and continue
255 * condensing text.
256 */
257 while (*pzS == q) {
258 *pzD = NUL; /* This is probably the end of the line */
259 if (! contiguous_quote(&pzS, &q, lnct_p))
260 return pzS;
261 }
262
263 /*
264 * We are inside a quoted string. Copy text.
265 */
266 switch (*(pzD++) = *(pzS++)) {
267 case NUL:
268 return NULL;
269
270 case NL:
271 (*lnct_p)++;
272 break;
273
274 case '\\':
275 /*
276 * IF we are escaping a new line,
277 * THEN drop both the escape and the newline from
278 * the result string.
279 */
280 if (*pzS == NL) {
281 pzS++;
282 pzD--;
283 (*lnct_p)++;
284 }
285
286 /*
287 * ELSE IF the quote character is '"' or '`',
288 * THEN we do the full escape character processing
289 */
290 else if (q != '\'') {
291 unsigned int ct;
292 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
293 if (ct == 0)
294 return NULL;
295
296 pzS += ct;
297 } /* if (q != '\'') */
298
299 /*
300 * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
301 * The latter only to easily hide preprocessing directives.
302 */
303 else switch (*pzS) {
304 case '\\':
305 case '\'':
306 case '#':
307 pzD[-1] = *pzS++;
308 }
309 } /* switch (*(pzD++) = *(pzS++)) */
310 } /* for (;;) */
311 }
312
313 /** @}
314 *
315 * Local Variables:
316 * mode: C
317 * c-file-style: "stroustrup"
318 * indent-tabs-mode: nil
319 * End:
320 * end of autoopts/cook.c */
321