1 /** 2 * \file cook.c 3 * 4 * This file contains the routines that deal with processing quoted strings 5 * into an internal format. 6 * 7 * @addtogroup autoopts 8 * @{ 9 */ 10 /* 11 * This file is part of AutoOpts, a companion to AutoGen. 12 * AutoOpts is free software. 13 * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved 14 * 15 * AutoOpts is available under any one of two licenses. The license 16 * in use must be one of these two and the choice is under the control 17 * of the user of the license. 18 * 19 * The GNU Lesser General Public License, version 3 or later 20 * See the files "COPYING.lgplv3" and "COPYING.gplv3" 21 * 22 * The Modified Berkeley Software Distribution License 23 * See the file "COPYING.mbsd" 24 * 25 * These files have the following sha256 sums: 26 * 27 * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 28 * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 29 * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 30 */ 31 32 /*=export_func ao_string_cook_escape_char 33 * private: 34 * 35 * what: escape-process a string fragment 36 * arg: + char const * + pzScan + points to character after the escape + 37 * arg: + char * + pRes + Where to put the result byte + 38 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 39 * 40 * ret-type: unsigned int 41 * ret-desc: The number of bytes consumed processing the escaped character. 42 * 43 * doc: 44 * 45 * This function converts "t" into "\t" and all your other favorite 46 * escapes, including numeric ones: hex and ocatal, too. 47 * The returned result tells the caller how far to advance the 48 * scan pointer (passed in). The default is to just pass through the 49 * escaped character and advance the scan by one. 50 * 51 * Some applications need to keep an escaped newline, others need to 52 * suppress it. This is accomplished by supplying a '\n' replacement 53 * character that is different from \n, if need be. For example, use 54 * 0x7F and never emit a 0x7F. 55 * 56 * err: @code{NULL} is returned if the string is mal-formed. 57 =*/ 58 unsigned int 59 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl) 60 { 61 unsigned int res = 1; 62 63 switch (*pRes = *pzIn++) { 64 case NUL: /* NUL - end of input string */ 65 return 0; 66 case '\r': 67 if (*pzIn != NL) 68 return 1; 69 res++; 70 /* FALLTHROUGH */ 71 case NL: /* NL - emit newline */ 72 *pRes = (char)nl; 73 return res; 74 75 case 'a': *pRes = '\a'; break; 76 case 'b': *pRes = '\b'; break; 77 case 'f': *pRes = '\f'; break; 78 case 'n': *pRes = NL; break; 79 case 'r': *pRes = '\r'; break; 80 case 't': *pRes = '\t'; break; 81 case 'v': *pRes = '\v'; break; 82 83 case 'x': 84 case 'X': /* HEX Escape */ 85 if (IS_HEX_DIGIT_CHAR(*pzIn)) { 86 char z[4]; 87 unsigned int ct = 0; 88 89 do { 90 z[ct] = pzIn[ct]; 91 if (++ct >= 2) 92 break; 93 } while (IS_HEX_DIGIT_CHAR(pzIn[ct])); 94 z[ct] = NUL; 95 *pRes = (char)strtoul(z, NULL, 16); 96 return ct + 1; 97 } 98 break; 99 100 case '0': case '1': case '2': case '3': 101 case '4': case '5': case '6': case '7': 102 { 103 /* 104 * IF the character copied was an octal digit, 105 * THEN set the output character to an octal value. 106 * The 3 octal digit result might exceed 0xFF, so check it. 107 */ 108 char z[4]; 109 unsigned long val; 110 unsigned int ct = 0; 111 112 z[ct++] = *--pzIn; 113 while (IS_OCT_DIGIT_CHAR(pzIn[ct])) { 114 z[ct] = pzIn[ct]; 115 if (++ct >= 3) 116 break; 117 } 118 119 z[ct] = NUL; 120 val = strtoul(z, NULL, 8); 121 if (val > 0xFF) 122 val = 0xFF; 123 *pRes = (char)val; 124 return ct; 125 } 126 127 default: /* quoted character is result character */; 128 } 129 130 return res; 131 } 132 133 /** 134 * count newlines between start and end 135 */ 136 static char * 137 nl_count(char * start, char * end, int * lnct_p) 138 { 139 while (start < end) { 140 if (*(start++) == NL) 141 (*lnct_p)++; 142 } 143 return end; 144 } 145 146 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 147 * 148 * A quoted string has been found. 149 * Find the end of it and compress any escape sequences. 150 */ 151 static bool 152 contiguous_quote(char ** pps, char * pq, int * lnct_p) 153 { 154 char * ps = *pps + 1; 155 156 for (;;) { 157 while (IS_WHITESPACE_CHAR(*ps)) 158 if (*(ps++) == NL) 159 (*lnct_p)++; 160 161 /* 162 * IF the next character is a quote character, 163 * THEN we will concatenate the strings. 164 */ 165 switch (*ps) { 166 case '"': 167 case '\'': 168 *pq = *(ps++); /* assign new quote character and return */ 169 *pps = ps; 170 return true; 171 172 case '/': 173 /* 174 * Allow for a comment embedded in the concatenated string. 175 */ 176 switch (ps[1]) { 177 default: 178 goto fail_return; 179 180 case '/': 181 /* 182 * Skip to end of line 183 */ 184 ps = strchr(ps, NL); 185 if (ps == NULL) 186 goto fail_return; 187 break; 188 189 case '*': 190 ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p); 191 if (ps == NULL) 192 goto fail_return; 193 ps += 2; 194 } 195 continue; 196 197 default: 198 /* 199 * The next non-whitespace character is not a quote. 200 * The series of quoted strings has come to an end. 201 */ 202 *pps = ps; 203 return false; 204 } 205 } 206 207 fail_return: 208 *pps = NULL; 209 return false; 210 } 211 212 /*=export_func ao_string_cook 213 * private: 214 * 215 * what: concatenate and escape-process strings 216 * arg: + char * + pzScan + The *MODIFIABLE* input buffer + 217 * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count + 218 * 219 * ret-type: char * 220 * ret-desc: The address of the text following the processed strings. 221 * The return value is NULL if the strings are ill-formed. 222 * 223 * doc: 224 * 225 * A series of one or more quoted strings are concatenated together. 226 * If they are quoted with double quotes (@code{"}), then backslash 227 * escapes are processed per the C programming language. If they are 228 * single quote strings, then the backslashes are honored only when they 229 * precede another backslash or a single quote character. 230 * 231 * err: @code{NULL} is returned if the string(s) is/are mal-formed. 232 =*/ 233 char * 234 ao_string_cook(char * pzScan, int * lnct_p) 235 { 236 int l = 0; 237 char q = *pzScan; 238 239 /* 240 * It is a quoted string. Process the escape sequence characters 241 * (in the set "abfnrtv") and make sure we find a closing quote. 242 */ 243 char * pzD = pzScan++; 244 char * pzS = pzScan; 245 246 if (lnct_p == NULL) 247 lnct_p = &l; 248 249 for (;;) { 250 /* 251 * IF the next character is the quote character, THEN we may end the 252 * string. We end it unless the next non-blank character *after* the 253 * string happens to also be a quote. If it is, then we will change 254 * our quote character to the new quote character and continue 255 * condensing text. 256 */ 257 while (*pzS == q) { 258 *pzD = NUL; /* This is probably the end of the line */ 259 if (! contiguous_quote(&pzS, &q, lnct_p)) 260 return pzS; 261 } 262 263 /* 264 * We are inside a quoted string. Copy text. 265 */ 266 switch (*(pzD++) = *(pzS++)) { 267 case NUL: 268 return NULL; 269 270 case NL: 271 (*lnct_p)++; 272 break; 273 274 case '\\': 275 /* 276 * IF we are escaping a new line, 277 * THEN drop both the escape and the newline from 278 * the result string. 279 */ 280 if (*pzS == NL) { 281 pzS++; 282 pzD--; 283 (*lnct_p)++; 284 } 285 286 /* 287 * ELSE IF the quote character is '"' or '`', 288 * THEN we do the full escape character processing 289 */ 290 else if (q != '\'') { 291 unsigned int ct; 292 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL); 293 if (ct == 0) 294 return NULL; 295 296 pzS += ct; 297 } /* if (q != '\'') */ 298 299 /* 300 * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 301 * The latter only to easily hide preprocessing directives. 302 */ 303 else switch (*pzS) { 304 case '\\': 305 case '\'': 306 case '#': 307 pzD[-1] = *pzS++; 308 } 309 } /* switch (*(pzD++) = *(pzS++)) */ 310 } /* for (;;) */ 311 } 312 313 /** @} 314 * 315 * Local Variables: 316 * mode: C 317 * c-file-style: "stroustrup" 318 * indent-tabs-mode: nil 319 * End: 320 * end of autoopts/cook.c */ 321