xref: /titanic_44/usr/src/lib/libshell/common/sh/string.c (revision 3c112a2b34403220c06c3e2fcac403358cfba168)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1982-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                  David Korn <dgk@research.att.com>                   *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22  * string processing routines for Korn shell
23  *
24  */
25 
26 #include	<ast.h>
27 #include	<ast_wchar.h>
28 #include	"defs.h"
29 #include	<stak.h>
30 #include	<ccode.h>
31 #include	"shtable.h"
32 #include	"lexstates.h"
33 #include	"national.h"
34 
35 #if !SHOPT_MULTIBYTE
36 #define mbchar(p)	(*(unsigned char*)p++)
37 #endif
38 
39 #if _hdr_wctype
40 #   include <wctype.h>
41 #endif
42 
43 #if !_lib_iswprint && !defined(iswprint)
44 #   define iswprint(c)		(((c)&~0377) || isprint(c))
45 #endif
46 
47 
48 /*
49  *  Table lookup routine
50  *  <table> is searched for string <sp> and corresponding value is returned
51  *  This is only used for small tables and is used to save non-sharable memory
52  */
53 
54 const Shtable_t *sh_locate(register const char *sp,const Shtable_t *table,int size)
55 {
56 	register int			first;
57 	register const Shtable_t	*tp;
58 	register int			c;
59 	static const Shtable_t		empty = {0,0};
60 	if(sp==0 || (first= *sp)==0)
61 		return(&empty);
62 	tp=table;
63 	while((c= *tp->sh_name) && (CC_NATIVE!=CC_ASCII || c <= first))
64 	{
65 		if(first == c && strcmp(sp,tp->sh_name)==0)
66 			return(tp);
67 		tp = (Shtable_t*)((char*)tp+size);
68 	}
69 	return(&empty);
70 }
71 
72 /*
73  *  shtab_options lookup routine
74  */
75 
76 #define sep(c)		((c)=='-'||(c)=='_')
77 
78 int sh_lookopt(register const char *sp, int *invert)
79 {
80 	register int			first;
81 	register const Shtable_t	*tp;
82 	register int			c;
83 	register const char		*s, *t, *sw, *tw;
84 	int				amb;
85 	int				hit;
86 	int				inv;
87 	int				no;
88 	if(sp==0)
89 		return(0);
90 	if(*sp=='n' && *(sp+1)=='o' && (*(sp+2)!='t' || *(sp+3)!='i'))
91 	{
92 		sp+=2;
93 		if(sep(*sp))
94 			sp++;
95 		*invert = !*invert;
96 	}
97 	if((first= *sp)==0)
98 		return(0);
99 	tp=shtab_options;
100 	amb=hit=0;
101 	for(;;)
102 	{
103 		t=tp->sh_name;
104 		if(no = *t=='n' && *(t+1)=='o' && *(t+2)!='t')
105 			t+=2;
106 		if(!(c= *t))
107 			break;
108 		if(first == c)
109 		{
110 			if(strcmp(sp,t)==0)
111 			{
112 				*invert ^= no;
113 				return(tp->sh_number);
114 			}
115 			s=sw=sp;
116 			tw=t;
117 			for(;;)
118 			{
119 				if(!*s || *s=='=')
120 				{
121 					if (*s == '=' && !strtol(s+1, NiL, 0))
122 						no = !no;
123 					if (!*t)
124 					{
125 						*invert ^= no;
126 						return(tp->sh_number);
127 					}
128 					if (hit || amb)
129 					{
130 						hit = 0;
131 						amb = 1;
132 					}
133 					else
134 					{
135 						hit = tp->sh_number;
136 						inv = no;
137 					}
138 					break;
139 				}
140 				else if(!*t)
141 					break;
142 				else if(sep(*s))
143 					sw = ++s;
144 				else if(sep(*t))
145 					tw = ++t;
146 				else if(*s==*t)
147 				{
148 					s++;
149 					t++;
150 				}
151 				else if(s==sw && t==tw)
152 					break;
153 				else
154 				{
155 					if(t!=tw)
156 					{
157 						while(*t && !sep(*t))
158 							t++;
159 						if(!*t)
160 							break;
161 						tw = ++t;
162 					}
163 					while (s>sw && *s!=*t)
164 						s--;
165 				}
166 			}
167 		}
168 		tp = (Shtable_t*)((char*)tp+sizeof(*shtab_options));
169 	}
170 	if(hit)
171 		*invert ^= inv;
172 	return(hit);
173 }
174 
175 /*
176  * look for the substring <oldsp> in <string> and replace with <newsp>
177  * The new string is put on top of the stack
178  */
179 char *sh_substitute(const char *string,const char *oldsp,char *newsp)
180 /*@
181 	assume string!=NULL && oldsp!=NULL && newsp!=NULL;
182 	return x satisfying x==NULL ||
183 		strlen(x)==(strlen(in string)+strlen(in newsp)-strlen(in oldsp));
184 @*/
185 {
186 	register const char *sp = string;
187 	register const char *cp;
188 	const char *savesp = 0;
189 	stakseek(0);
190 	if(*sp==0)
191 		return((char*)0);
192 	if(*(cp=oldsp) == 0)
193 		goto found;
194 #if SHOPT_MULTIBYTE
195 	mbinit();
196 #endif /* SHOPT_MULTIBYTE */
197 	do
198 	{
199 	/* skip to first character which matches start of oldsp */
200 		while(*sp && (savesp==sp || *sp != *cp))
201 		{
202 #if SHOPT_MULTIBYTE
203 			/* skip a whole character at a time */
204 			int c = mbsize(sp);
205 			if(c < 0)
206 				sp++;
207 			while(c-- > 0)
208 #endif /* SHOPT_MULTIBYTE */
209 			stakputc(*sp++);
210 		}
211 		if(*sp == 0)
212 			return((char*)0);
213 		savesp = sp;
214 	        for(;*cp;cp++)
215 		{
216 			if(*cp != *sp++)
217 				break;
218 		}
219 		if(*cp==0)
220 		/* match found */
221 			goto found;
222 		sp = savesp;
223 		cp = oldsp;
224 	}
225 	while(*sp);
226 	return((char*)0);
227 
228 found:
229 	/* copy new */
230 	stakputs(newsp);
231 	/* copy rest of string */
232 	stakputs(sp);
233 	return(stakfreeze(1));
234 }
235 
236 /*
237  * TRIM(sp)
238  * Remove escape characters from characters in <sp> and eliminate quoted nulls.
239  */
240 
241 void	sh_trim(register char *sp)
242 /*@
243 	assume sp!=NULL;
244 	promise  strlen(in sp) <= in strlen(sp);
245 @*/
246 {
247 	register char *dp;
248 	register int c;
249 	if(sp)
250 	{
251 		dp = sp;
252 		while(c= *sp)
253 		{
254 #if SHOPT_MULTIBYTE
255 			int len;
256 			if(mbwide() && (len=mbsize(sp))>1)
257 			{
258 				memmove(dp, sp, len);
259 				dp += len;
260 				sp += len;
261 				continue;
262 			}
263 #endif /* SHOPT_MULTIBYTE */
264 			sp++;
265 			if(c == '\\')
266 				c = *sp++;
267 			if(c)
268 				*dp++ = c;
269 		}
270 		*dp = 0;
271 	}
272 }
273 
274 /*
275  * copy <str1> to <str2> changing upper case to lower case
276  * <str2> must be big enough to hold <str1>
277  * <str1> and <str2> may point to the same place.
278  */
279 
280 void sh_utol(register char const *str1,register char *str2)
281 /*@
282 	assume str1!=0 && str2!=0
283 	return x satisfying strlen(in str1)==strlen(in str2);
284 @*/
285 {
286 	register int c;
287 	for(; c= *((unsigned char*)str1); str1++,str2++)
288 	{
289 		if(isupper(c))
290 			*str2 = tolower(c);
291 		else
292 			*str2 = c;
293 	}
294 	*str2 = 0;
295 }
296 
297 /*
298  * print <str> quoting chars so that it can be read by the shell
299  * puts null terminated result on stack, but doesn't freeze it
300  */
301 char	*sh_fmtq(const char *string)
302 {
303 	register const char *cp = string, *op;
304 	register int c, state;
305 	int offset;
306 	if(!cp)
307 		return((char*)0);
308 	offset = staktell();
309 #if SHOPT_MULTIBYTE
310 	state = ((c= mbchar(cp))==0);
311 #else
312 	state = ((c= *(unsigned char*)cp++)==0);
313 #endif
314 	if(isaletter(c))
315 	{
316 #if SHOPT_MULTIBYTE
317 		while((c=mbchar(cp)),isaname(c));
318 #else
319 		while((c = *(unsigned char*)cp++),isaname(c));
320 #endif
321 		if(c==0)
322 			return((char*)string);
323 		if(c=='=')
324 		{
325 			if(*cp==0)
326 				return((char*)string);
327 			c = cp - string;
328 			stakwrite(string,c);
329 			string = cp;
330 #if SHOPT_MULTIBYTE
331 			c = mbchar(cp);
332 #else
333 			c = *(unsigned char*)cp++;
334 #endif
335 		}
336 	}
337 	if(c==0 || c=='#' || c=='~')
338 		state = 1;
339 #if SHOPT_MULTIBYTE
340 	for(;c;c= mbchar(cp))
341 #else
342 	for(;c; c= *(unsigned char*)cp++)
343 #endif
344 	{
345 #if SHOPT_MULTIBYTE
346 		if(c=='\'' || !iswprint(c))
347 #else
348 		if(c=='\'' || !isprint(c))
349 #endif /* SHOPT_MULTIBYTE */
350 			state = 2;
351 		else if(c==']' || (c!=':' && c<=0xff && (c=sh_lexstates[ST_NORM][c]) && c!=S_EPAT))
352 			state |=1;
353 	}
354 	if(state<2)
355 	{
356 		if(state==1)
357 			stakputc('\'');
358 		if(c = --cp - string)
359 			stakwrite(string,c);
360 		if(state==1)
361 			stakputc('\'');
362 	}
363 	else
364 	{
365 		stakwrite("$'",2);
366 		cp = string;
367 #if SHOPT_MULTIBYTE
368 		while(op = cp, c= mbchar(cp))
369 #else
370 		while(op = cp, c= *(unsigned char*)cp++)
371 #endif
372 		{
373 			state=1;
374 			switch(c)
375 			{
376 			    case ('a'==97?'\033':39):
377 				c = 'E';
378 				break;
379 			    case '\n':
380 				c = 'n';
381 				break;
382 			    case '\r':
383 				c = 'r';
384 				break;
385 			    case '\t':
386 				c = 't';
387 				break;
388 			    case '\f':
389 				c = 'f';
390 				break;
391 			    case '\b':
392 				c = 'b';
393 				break;
394 			    case '\a':
395 				c = 'a';
396 				break;
397 			    case '\\':	case '\'':
398 				break;
399 			    default:
400 #if SHOPT_MULTIBYTE
401 				if(!iswprint(c))
402 				{
403 					while(op<cp)
404 						sfprintf(staksp,"\\%.3o",*(unsigned char*)op++);
405 					continue;
406 				}
407 #else
408 				if(!isprint(c))
409 				{
410 					sfprintf(staksp,"\\%.3o",c);
411 					continue;
412 				}
413 #endif
414 				state=0;
415 				break;
416 			}
417 			if(state)
418 			{
419 				stakputc('\\');
420 				stakputc(c);
421 			}
422 			else
423 				stakwrite(op, cp-op);
424 		}
425 		stakputc('\'');
426 	}
427 	stakputc(0);
428 	return(stakptr(offset));
429 }
430 
431 /*
432  * print <str> quoting chars so that it can be read by the shell
433  * puts null terminated result on stack, but doesn't freeze it
434  * single!=0 limits quoting to '...'
435  * fold>0 prints raw newlines and inserts appropriately
436  * escaped newlines every (fold-x) chars
437  */
438 char	*sh_fmtqf(const char *string, int single, int fold)
439 {
440 	register const char *cp = string;
441 	register const char *bp;
442 	register const char *vp;
443 	register int c;
444 	register int n;
445 	register int q;
446 	register int a;
447 	int offset;
448 
449 	if (--fold < 8)
450 		fold = 0;
451 	if (!cp || !*cp || !single && !fold || fold && strlen(string) < fold)
452 		return sh_fmtq(cp);
453 	offset = staktell();
454 	single = single ? 1 : 3;
455 	c = mbchar(string);
456 	a = isaletter(c) ? '=' : 0;
457 	vp = cp + 1;
458 	do
459 	{
460 		q = 0;
461 		n = fold;
462 		bp = cp;
463 		while ((!n || n-- > 0) && (c = mbchar(cp)))
464 		{
465 			if (a && !isaname(c))
466 				a = 0;
467 #if SHOPT_MULTIBYTE
468 			if (c >= 0x200)
469 				continue;
470 			if (c == '\'' || !iswprint(c))
471 #else
472 			if (c == '\'' || !isprint(c))
473 #endif /* SHOPT_MULTIBYTE */
474 			{
475 				q = single;
476 				break;
477 			}
478 			if (c == '\n')
479 				q = 1;
480 			else if (c == a)
481 			{
482 				stakwrite(bp, cp - bp);
483 				bp = cp;
484 				vp = cp + 1;
485 				a = 0;
486 			}
487 			else if ((c == '#' || c == '~') && cp == vp || c == ']' || c != ':' && (c = sh_lexstates[ST_NORM][c]) && c != S_EPAT)
488 				q = 1;
489 		}
490 		if (q & 2)
491 		{
492 			stakputc('$');
493 			stakputc('\'');
494 			cp = bp;
495 			n = fold - 3;
496 			q = 1;
497 			while (c = mbchar(cp))
498 			{
499 				switch (c)
500 				{
501 		    		case ('a'==97?'\033':39):
502 					c = 'E';
503 					break;
504 		    		case '\n':
505 					q = 0;
506 					n = fold - 1;
507 					break;
508 		    		case '\r':
509 					c = 'r';
510 					break;
511 		    		case '\t':
512 					c = 't';
513 					break;
514 		    		case '\f':
515 					c = 'f';
516 					break;
517 		    		case '\b':
518 					c = 'b';
519 					break;
520 		    		case '\a':
521 					c = 'a';
522 					break;
523 		    		case '\\':
524 					if (*cp == 'n')
525 					{
526 						c = '\n';
527 						q = 0;
528 						n = fold - 1;
529 						break;
530 					}
531 				case '\'':
532 					break;
533 		    		default:
534 #if SHOPT_MULTIBYTE
535 					if(!iswprint(c))
536 #else
537 					if(!isprint(c))
538 #endif
539 					{
540 						if ((n -= 4) <= 0)
541 						{
542 							stakwrite("'\\\n$'", 5);
543 							n = fold - 7;
544 						}
545 						sfprintf(staksp, "\\%03o", c);
546 						continue;
547 					}
548 					q = 0;
549 					break;
550 				}
551 				if ((n -= q + 1) <= 0)
552 				{
553 					if (!q)
554 					{
555 						stakputc('\'');
556 						cp = bp;
557 						break;
558 					}
559 					stakwrite("'\\\n$'", 5);
560 					n = fold - 5;
561 				}
562 				if (q)
563 					stakputc('\\');
564 				else
565 					q = 1;
566 				stakputc(c);
567 				bp = cp;
568 			}
569 			if (!c)
570 				stakputc('\'');
571 		}
572 		else if (q & 1)
573 		{
574 			stakputc('\'');
575 			cp = bp;
576 			n = fold ? (fold - 2) : 0;
577 			while (c = mbchar(cp))
578 			{
579 				if (c == '\n')
580 					n = fold - 1;
581 				else if (n && --n <= 0)
582 				{
583 					n = fold - 2;
584 					stakwrite(bp, --cp - bp);
585 					bp = cp;
586 					stakwrite("'\\\n'", 4);
587 				}
588 				else if (n == 1 && *cp == '\'')
589 				{
590 					n = fold - 5;
591 					stakwrite(bp, --cp - bp);
592 					bp = cp;
593 					stakwrite("'\\\n\\''", 6);
594 				}
595 				else if (c == '\'')
596 				{
597 					stakwrite(bp, cp - bp - 1);
598 					bp = cp;
599 					if (n && (n -= 4) <= 0)
600 					{
601 						n = fold - 5;
602 						stakwrite("'\\\n\\''", 6);
603 					}
604 					else
605 						stakwrite("'\\''", 4);
606 				}
607 			}
608 			stakwrite(bp, cp - bp - 1);
609 			stakputc('\'');
610 		}
611 		else if (n = fold)
612 		{
613 			cp = bp;
614 			while (c = mbchar(cp))
615 			{
616 				if (--n <= 0)
617 				{
618 					n = fold;
619 					stakwrite(bp, --cp - bp);
620 					bp = cp;
621 					stakwrite("\\\n", 2);
622 				}
623 			}
624 			stakwrite(bp, cp - bp - 1);
625 		}
626 		else
627 			stakwrite(bp, cp - bp);
628 		if (c)
629 		{
630 			stakputc('\\');
631 			stakputc('\n');
632 		}
633 	} while (c);
634 	stakputc(0);
635 	return(stakptr(offset));
636 }
637 
638 #if SHOPT_MULTIBYTE
639 	int sh_strchr(const char *string, register const char *dp)
640 	{
641 		wchar_t c, d;
642 		register const char *cp=string;
643 		mbinit();
644 		d = mbchar(dp);
645 		mbinit();
646 		while(c = mbchar(cp))
647 		{
648 			if(c==d)
649 				return(cp-string);
650 		}
651 		if(d==0)
652 			return(cp-string);
653 		return(-1);
654 	}
655 #endif /* SHOPT_MULTIBYTE */
656 
657 const char *_sh_translate(const char *message)
658 {
659 #if ERROR_VERSION >= 20000317L
660 	return(ERROR_translate(0,0,e_dict,message));
661 #else
662 #if ERROR_VERSION >= 20000101L
663 	return(ERROR_translate(e_dict,message));
664 #else
665 	return(ERROR_translate(message,1));
666 #endif
667 #endif
668 }
669 
670 /*
671  * change '['identifier']' to identifier
672  * character before <str> must be a '['
673  * returns pointer to last character
674  */
675 char *sh_checkid(char *str, char *last)
676 {
677 	register unsigned char *cp = (unsigned char*)str;
678 	register unsigned char *v = cp;
679 	register int c;
680 	if(c= *cp++,isaletter(c))
681 		while(c= *cp++,isaname(c));
682 	if(c==']' && (!last || ((char*)cp==last)))
683 	{
684 		/* eliminate [ and ] */
685 		while(v < cp)
686 		{
687 			v[-1] = *v;
688 			v++;
689 		}
690 		if(last)
691 			last -=2;
692 		else
693 		{
694 			while(*v)
695 			{
696 				v[-2] = *v;
697 				v++;
698 			}
699 			v[-2] = 0;
700 			last = (char*)v;
701 		}
702 	}
703 	return(last);
704 }
705 
706 #if	_AST_VERSION  <= 20000317L
707 char *fmtident(const char *string)
708 {
709 	return((char*)string);
710 }
711 #endif
712