xref: /illumos-gate/usr/src/contrib/ast/src/cmd/ksh93/sh/string.c (revision b30d193948be5a7794d7ae3ba0ed9c2f72c88e0f)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1982-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                  David Korn <dgk@research.att.com>                   *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22  * string processing routines for Korn shell
23  *
24  */
25 
26 #include	<ast.h>
27 #include	<ast_wchar.h>
28 #include	"defs.h"
29 #include	<stak.h>
30 #include	<ccode.h>
31 #include	"shtable.h"
32 #include	"lexstates.h"
33 #include	"national.h"
34 
35 #if _hdr_wctype
36 #   include <wctype.h>
37 #endif
38 
39 #if !_lib_iswprint && !defined(iswprint)
40 #   define iswprint(c)		(((c)&~0377) || isprint(c))
41 #endif
42 
43 
44 /*
45  *  Table lookup routine
46  *  <table> is searched for string <sp> and corresponding value is returned
47  *  This is only used for small tables and is used to save non-sharable memory
48  */
49 
sh_locate(register const char * sp,const Shtable_t * table,int size)50 const Shtable_t *sh_locate(register const char *sp,const Shtable_t *table,int size)
51 {
52 	register int			first;
53 	register const Shtable_t	*tp;
54 	register int			c;
55 	static const Shtable_t		empty = {0,0};
56 	if(sp==0 || (first= *sp)==0)
57 		return(&empty);
58 	tp=table;
59 	while((c= *tp->sh_name) && (CC_NATIVE!=CC_ASCII || c <= first))
60 	{
61 		if(first == c && strcmp(sp,tp->sh_name)==0)
62 			return(tp);
63 		tp = (Shtable_t*)((char*)tp+size);
64 	}
65 	return(&empty);
66 }
67 
68 /*
69  *  shtab_options lookup routine
70  */
71 
72 #define sep(c)		((c)=='-'||(c)=='_')
73 
sh_lookopt(register const char * sp,int * invert)74 int sh_lookopt(register const char *sp, int *invert)
75 {
76 	register int			first;
77 	register const Shtable_t	*tp;
78 	register int			c;
79 	register const char		*s, *t, *sw, *tw;
80 	int				amb;
81 	int				hit;
82 	int				inv;
83 	int				no;
84 	if(sp==0)
85 		return(0);
86 	if(*sp=='n' && *(sp+1)=='o' && (*(sp+2)!='t' || *(sp+3)!='i'))
87 	{
88 		sp+=2;
89 		if(sep(*sp))
90 			sp++;
91 		*invert = !*invert;
92 	}
93 	if((first= *sp)==0)
94 		return(0);
95 	tp=shtab_options;
96 	amb=hit=0;
97 	for(;;)
98 	{
99 		t=tp->sh_name;
100 		if(no = *t=='n' && *(t+1)=='o' && *(t+2)!='t')
101 			t+=2;
102 		if(!(c= *t))
103 			break;
104 		if(first == c)
105 		{
106 			if(strcmp(sp,t)==0)
107 			{
108 				*invert ^= no;
109 				return(tp->sh_number);
110 			}
111 			s=sw=sp;
112 			tw=t;
113 			for(;;)
114 			{
115 				if(!*s || *s=='=')
116 				{
117 					if (*s == '=' && !strtol(s+1, NiL, 0))
118 						no = !no;
119 					if (!*t)
120 					{
121 						*invert ^= no;
122 						return(tp->sh_number);
123 					}
124 					if (hit || amb)
125 					{
126 						hit = 0;
127 						amb = 1;
128 					}
129 					else
130 					{
131 						hit = tp->sh_number;
132 						inv = no;
133 					}
134 					break;
135 				}
136 				else if(!*t)
137 					break;
138 				else if(sep(*s))
139 					sw = ++s;
140 				else if(sep(*t))
141 					tw = ++t;
142 				else if(*s==*t)
143 				{
144 					s++;
145 					t++;
146 				}
147 				else if(s==sw && t==tw)
148 					break;
149 				else
150 				{
151 					if(t!=tw)
152 					{
153 						while(*t && !sep(*t))
154 							t++;
155 						if(!*t)
156 							break;
157 						tw = ++t;
158 					}
159 					while (s>sw && *s!=*t)
160 						s--;
161 				}
162 			}
163 		}
164 		tp = (Shtable_t*)((char*)tp+sizeof(*shtab_options));
165 	}
166 	if(hit)
167 		*invert ^= inv;
168 	return(hit);
169 }
170 
171 /*
172  * look for the substring <oldsp> in <string> and replace with <newsp>
173  * The new string is put on top of the stack
174  */
sh_substitute(const char * string,const char * oldsp,char * newsp)175 char *sh_substitute(const char *string,const char *oldsp,char *newsp)
176 /*@
177 	assume string!=NULL && oldsp!=NULL && newsp!=NULL;
178 	return x satisfying x==NULL ||
179 		strlen(x)==(strlen(in string)+strlen(in newsp)-strlen(in oldsp));
180 @*/
181 {
182 	register const char *sp = string;
183 	register const char *cp;
184 	const char *savesp = 0;
185 	stakseek(0);
186 	if(*sp==0)
187 		return((char*)0);
188 	if(*(cp=oldsp) == 0)
189 		goto found;
190 #if SHOPT_MULTIBYTE
191 	mbinit();
192 #endif /* SHOPT_MULTIBYTE */
193 	do
194 	{
195 	/* skip to first character which matches start of oldsp */
196 		while(*sp && (savesp==sp || *sp != *cp))
197 		{
198 #if SHOPT_MULTIBYTE
199 			/* skip a whole character at a time */
200 			int c = mbsize(sp);
201 			if(c < 0)
202 				sp++;
203 			while(c-- > 0)
204 #endif /* SHOPT_MULTIBYTE */
205 			stakputc(*sp++);
206 		}
207 		if(*sp == 0)
208 			return((char*)0);
209 		savesp = sp;
210 	        for(;*cp;cp++)
211 		{
212 			if(*cp != *sp++)
213 				break;
214 		}
215 		if(*cp==0)
216 		/* match found */
217 			goto found;
218 		sp = savesp;
219 		cp = oldsp;
220 	}
221 	while(*sp);
222 	return((char*)0);
223 
224 found:
225 	/* copy new */
226 	stakputs(newsp);
227 	/* copy rest of string */
228 	stakputs(sp);
229 	return(stakfreeze(1));
230 }
231 
232 /*
233  * TRIM(sp)
234  * Remove escape characters from characters in <sp> and eliminate quoted nulls.
235  */
236 
sh_trim(register char * sp)237 void	sh_trim(register char *sp)
238 /*@
239 	assume sp!=NULL;
240 	promise  strlen(in sp) <= in strlen(sp);
241 @*/
242 {
243 	register char *dp;
244 	register int c;
245 	if(sp)
246 	{
247 		dp = sp;
248 		while(c= *sp)
249 		{
250 #if SHOPT_MULTIBYTE
251 			int len;
252 			if(mbwide() && (len=mbsize(sp))>1)
253 			{
254 				memmove(dp, sp, len);
255 				dp += len;
256 				sp += len;
257 				continue;
258 			}
259 #endif /* SHOPT_MULTIBYTE */
260 			sp++;
261 			if(c == '\\')
262 				c = *sp++;
263 			if(c)
264 				*dp++ = c;
265 		}
266 		*dp = 0;
267 	}
268 }
269 
270 /*
271  * copy <str1> to <str2> changing upper case to lower case
272  * <str2> must be big enough to hold <str1>
273  * <str1> and <str2> may point to the same place.
274  */
275 
sh_utol(register char const * str1,register char * str2)276 void sh_utol(register char const *str1,register char *str2)
277 /*@
278 	assume str1!=0 && str2!=0
279 	return x satisfying strlen(in str1)==strlen(in str2);
280 @*/
281 {
282 	register int c;
283 	for(; c= *((unsigned char*)str1); str1++,str2++)
284 	{
285 		if(isupper(c))
286 			*str2 = tolower(c);
287 		else
288 			*str2 = c;
289 	}
290 	*str2 = 0;
291 }
292 
293 /*
294  * format string as a csv field
295  */
sh_fmtcsv(const char * string)296 static char	*sh_fmtcsv(const char *string)
297 {
298 	register const char *cp = string;
299 	register int c;
300 	int offset;
301 	if(!cp)
302 		return((char*)0);
303 	offset = staktell();
304 	while((c=mbchar(cp)),isaname(c));
305 	if(c==0)
306 		return((char*)string);
307 	stakputc('"');
308 	stakwrite(string,cp-string);
309 	if(c=='"')
310 		stakputc('"');
311 	string = cp;
312 	while(c=mbchar(cp))
313 	{
314 		if(c=='"')
315 		{
316 			stakwrite(string,cp-string);
317 			string = cp;
318 			stakputc('"');
319 		}
320 	}
321 	if(--cp>string)
322 		stakwrite(string,cp-string);
323 	stakputc('"');
324 	stakputc(0);
325 	return(stakptr(offset));
326 }
327 
328 /*
329  * print <str> quoting chars so that it can be read by the shell
330  * puts null terminated result on stack, but doesn't freeze it
331  */
sh_fmtq(const char * string)332 char	*sh_fmtq(const char *string)
333 {
334 	register const char *cp = string, *op;
335 	register int c, state;
336 	int offset;
337 	if(!cp)
338 		return((char*)0);
339 #if SHOPT_MULTIBYTE
340 	mbinit();
341 #endif
342 	offset = staktell();
343 	state = ((c= mbchar(cp))==0);
344 	if(isaletter(c))
345 	{
346 		while((c=mbchar(cp)),isaname(c));
347 		if(c==0)
348 			return((char*)string);
349 		if(c=='=')
350 		{
351 			if(*cp==0)
352 				return((char*)string);
353 			if(*cp=='=')
354 				cp++;
355 			c = cp - string;
356 			stakwrite(string,c);
357 			string = cp;
358 			c = mbchar(cp);
359 		}
360 	}
361 	if(c==0 || c=='#' || c=='~')
362 		state = 1;
363 	for(;c;c= mbchar(cp))
364 	{
365 #if SHOPT_MULTIBYTE
366 		if(c=='\'' || c>=128 || c<0 || !iswprint(c))
367 #else
368 		if(c=='\'' || !isprint(c))
369 #endif /* SHOPT_MULTIBYTE */
370 			state = 2;
371 		else if(c==']' || c=='=' || (c!=':' && c<=0x7f && (c=sh_lexstates[ST_NORM][c]) && c!=S_EPAT))
372 			state |=1;
373 	}
374 	if(state<2)
375 	{
376 		if(state==1)
377 			stakputc('\'');
378 		if(c = --cp - string)
379 			stakwrite(string,c);
380 		if(state==1)
381 			stakputc('\'');
382 	}
383 	else
384 	{
385 		int isbyte=0;
386 		stakwrite("$'",2);
387 		cp = string;
388 #if SHOPT_MULTIBYTE
389 		while(op = cp, c= mbchar(cp))
390 #else
391 		while(op = cp, c= *(unsigned char*)cp++)
392 #endif
393 		{
394 			state=1;
395 			switch(c)
396 			{
397 			    case ('a'==97?'\033':39):
398 				c = 'E';
399 				break;
400 			    case '\n':
401 				c = 'n';
402 				break;
403 			    case '\r':
404 				c = 'r';
405 				break;
406 			    case '\t':
407 				c = 't';
408 				break;
409 			    case '\f':
410 				c = 'f';
411 				break;
412 			    case '\b':
413 				c = 'b';
414 				break;
415 			    case '\a':
416 				c = 'a';
417 				break;
418 			    case '\\':	case '\'':
419 				break;
420 			    default:
421 #if SHOPT_MULTIBYTE
422 				isbyte = 0;
423 				if(c<0)
424 				{
425 					c = *((unsigned char *)op);
426 					cp = op+1;
427 					isbyte = 1;
428 				}
429 				if(mbwide() && ((cp-op)>1))
430 				{
431 					sfprintf(staksp,"\\u[%x]",c);
432 					continue;
433 				}
434 				else if(!iswprint(c) || isbyte)
435 #else
436 				if(!isprint(c))
437 #endif
438 				{
439 					sfprintf(staksp,"\\x%.2x",c);
440 					continue;
441 				}
442 				state=0;
443 				break;
444 			}
445 			if(state)
446 			{
447 				stakputc('\\');
448 				stakputc(c);
449 			}
450 			else
451 				stakwrite(op, cp-op);
452 		}
453 		stakputc('\'');
454 	}
455 	stakputc(0);
456 	return(stakptr(offset));
457 }
458 
459 /*
460  * print <str> quoting chars so that it can be read by the shell
461  * puts null terminated result on stack, but doesn't freeze it
462  * single!=0 limits quoting to '...'
463  * fold>0 prints raw newlines and inserts appropriately
464  * escaped newlines every (fold-x) chars
465  */
sh_fmtqf(const char * string,int single,int fold)466 char	*sh_fmtqf(const char *string, int single, int fold)
467 {
468 	register const char *cp = string;
469 	register const char *bp;
470 	register const char *vp;
471 	register int c;
472 	register int n;
473 	register int q;
474 	register int a;
475 	int offset;
476 
477 	if (--fold < 8)
478 		fold = 0;
479 	if(single)
480 		return sh_fmtcsv(cp);
481 	if (!cp || !*cp || !fold || fold && strlen(string) < fold)
482 		return sh_fmtq(cp);
483 	offset = staktell();
484 	single = single ? 1 : 3;
485 	c = mbchar(string);
486 	a = isaletter(c) ? '=' : 0;
487 	vp = cp + 1;
488 	do
489 	{
490 		q = 0;
491 		n = fold;
492 		bp = cp;
493 		while ((!n || n-- > 0) && (c = mbchar(cp)))
494 		{
495 			if (a && !isaname(c))
496 				a = 0;
497 #if SHOPT_MULTIBYTE
498 			if (c >= 0x200)
499 				continue;
500 			if (c == '\'' || !iswprint(c))
501 #else
502 			if (c == '\'' || !isprint(c))
503 #endif /* SHOPT_MULTIBYTE */
504 			{
505 				q = single;
506 				break;
507 			}
508 			if (c == '\n')
509 				q = 1;
510 			else if (c == a)
511 			{
512 				stakwrite(bp, cp - bp);
513 				bp = cp;
514 				vp = cp + 1;
515 				a = 0;
516 			}
517 			else if ((c == '#' || c == '~') && cp == vp || c == ']' || c != ':' && (c = sh_lexstates[ST_NORM][c]) && c != S_EPAT)
518 				q = 1;
519 		}
520 		if (q & 2)
521 		{
522 			stakputc('$');
523 			stakputc('\'');
524 			cp = bp;
525 			n = fold - 3;
526 			q = 1;
527 			while (c = mbchar(cp))
528 			{
529 				switch (c)
530 				{
531 		    		case ('a'==97?'\033':39):
532 					c = 'E';
533 					break;
534 		    		case '\n':
535 					q = 0;
536 					n = fold - 1;
537 					break;
538 		    		case '\r':
539 					c = 'r';
540 					break;
541 		    		case '\t':
542 					c = 't';
543 					break;
544 		    		case '\f':
545 					c = 'f';
546 					break;
547 		    		case '\b':
548 					c = 'b';
549 					break;
550 		    		case '\a':
551 					c = 'a';
552 					break;
553 		    		case '\\':
554 					if (*cp == 'n')
555 					{
556 						c = '\n';
557 						q = 0;
558 						n = fold - 1;
559 						break;
560 					}
561 				case '\'':
562 					break;
563 		    		default:
564 #if SHOPT_MULTIBYTE
565 					if(!iswprint(c))
566 #else
567 					if(!isprint(c))
568 #endif
569 					{
570 						if ((n -= 4) <= 0)
571 						{
572 							stakwrite("'\\\n$'", 5);
573 							n = fold - 7;
574 						}
575 						sfprintf(staksp, "\\%03o", c);
576 						continue;
577 					}
578 					q = 0;
579 					break;
580 				}
581 				if ((n -= q + 1) <= 0)
582 				{
583 					if (!q)
584 					{
585 						stakputc('\'');
586 						cp = bp;
587 						break;
588 					}
589 					stakwrite("'\\\n$'", 5);
590 					n = fold - 5;
591 				}
592 				if (q)
593 					stakputc('\\');
594 				else
595 					q = 1;
596 				stakputc(c);
597 				bp = cp;
598 			}
599 			if (!c)
600 				stakputc('\'');
601 		}
602 		else if (q & 1)
603 		{
604 			stakputc('\'');
605 			cp = bp;
606 			n = fold ? (fold - 2) : 0;
607 			while (c = mbchar(cp))
608 			{
609 				if (c == '\n')
610 					n = fold - 1;
611 				else if (n && --n <= 0)
612 				{
613 					n = fold - 2;
614 					stakwrite(bp, --cp - bp);
615 					bp = cp;
616 					stakwrite("'\\\n'", 4);
617 				}
618 				else if (n == 1 && *cp == '\'')
619 				{
620 					n = fold - 5;
621 					stakwrite(bp, --cp - bp);
622 					bp = cp;
623 					stakwrite("'\\\n\\''", 6);
624 				}
625 				else if (c == '\'')
626 				{
627 					stakwrite(bp, cp - bp - 1);
628 					bp = cp;
629 					if (n && (n -= 4) <= 0)
630 					{
631 						n = fold - 5;
632 						stakwrite("'\\\n\\''", 6);
633 					}
634 					else
635 						stakwrite("'\\''", 4);
636 				}
637 			}
638 			stakwrite(bp, cp - bp - 1);
639 			stakputc('\'');
640 		}
641 		else if (n = fold)
642 		{
643 			cp = bp;
644 			while (c = mbchar(cp))
645 			{
646 				if (--n <= 0)
647 				{
648 					n = fold;
649 					stakwrite(bp, --cp - bp);
650 					bp = cp;
651 					stakwrite("\\\n", 2);
652 				}
653 			}
654 			stakwrite(bp, cp - bp - 1);
655 		}
656 		else
657 			stakwrite(bp, cp - bp);
658 		if (c)
659 		{
660 			stakputc('\\');
661 			stakputc('\n');
662 		}
663 	} while (c);
664 	stakputc(0);
665 	return(stakptr(offset));
666 }
667 
668 #if SHOPT_MULTIBYTE
sh_strchr(const char * string,register const char * dp)669 	int sh_strchr(const char *string, register const char *dp)
670 	{
671 		wchar_t c, d;
672 		register const char *cp=string;
673 		mbinit();
674 		d = mbchar(dp);
675 		mbinit();
676 		while(c = mbchar(cp))
677 		{
678 			if(c==d)
679 				return(cp-string);
680 		}
681 		if(d==0)
682 			return(cp-string);
683 		return(-1);
684 	}
685 #endif /* SHOPT_MULTIBYTE */
686 
_sh_translate(const char * message)687 const char *_sh_translate(const char *message)
688 {
689 #if ERROR_VERSION >= 20000317L
690 	return(ERROR_translate(0,0,e_dict,message));
691 #else
692 #if ERROR_VERSION >= 20000101L
693 	return(ERROR_translate(e_dict,message));
694 #else
695 	return(ERROR_translate(message,1));
696 #endif
697 #endif
698 }
699 
700 /*
701  * change '['identifier']' to identifier
702  * character before <str> must be a '['
703  * returns pointer to last character
704  */
sh_checkid(char * str,char * last)705 char *sh_checkid(char *str, char *last)
706 {
707 	register unsigned char *cp = (unsigned char*)str;
708 	register unsigned char *v = cp;
709 	register int c;
710 	if(c=mbchar(cp),isaletter(c))
711 		while(c=mbchar(cp),isaname(c));
712 	if(c==']' && (!last || ((char*)cp==last)))
713 	{
714 		/* eliminate [ and ] */
715 		while(v < cp)
716 		{
717 			v[-1] = *v;
718 			v++;
719 		}
720 		if(last)
721 			last -=2;
722 		else
723 		{
724 			while(*v)
725 			{
726 				v[-2] = *v;
727 				v++;
728 			}
729 			v[-2] = 0;
730 			last = (char*)v;
731 		}
732 	}
733 	return(last);
734 }
735 
736 #if	_AST_VERSION  <= 20000317L
fmtident(const char * string)737 char *fmtident(const char *string)
738 {
739 	return((char*)string);
740 }
741 #endif
742