xref: /titanic_50/usr/src/lib/libshell/common/sh/string.c (revision 9f758caf94c37c3ad28c48cfe503f9fc830a66d5)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1982-2008 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                  David Korn <dgk@research.att.com>                   *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22  * string processing routines for Korn shell
23  *
24  */
25 
26 #include	<ast.h>
27 #include	<ast_wchar.h>
28 #include	"defs.h"
29 #include	<stak.h>
30 #include	<ctype.h>
31 #include	<ccode.h>
32 #include	"shtable.h"
33 #include	"lexstates.h"
34 #include	"national.h"
35 
36 #if !SHOPT_MULTIBYTE
37 #define mbchar(p)	(*(unsigned char*)p++)
38 #endif
39 
40 #if _hdr_wctype
41 #   include <wctype.h>
42 #endif
43 
44 #if !_lib_iswprint && !defined(iswprint)
45 #   define iswprint(c)		(((c)&~0377) || isprint(c))
46 #endif
47 
48 
49 /*
50  *  Table lookup routine
51  *  <table> is searched for string <sp> and corresponding value is returned
52  *  This is only used for small tables and is used to save non-sharable memory
53  */
54 
55 const Shtable_t *sh_locate(register const char *sp,const Shtable_t *table,int size)
56 {
57 	register int			first;
58 	register const Shtable_t	*tp;
59 	register int			c;
60 	static const Shtable_t		empty = {0,0};
61 	if(sp==0 || (first= *sp)==0)
62 		return(&empty);
63 	tp=table;
64 	while((c= *tp->sh_name) && (CC_NATIVE!=CC_ASCII || c <= first))
65 	{
66 		if(first == c && strcmp(sp,tp->sh_name)==0)
67 			return(tp);
68 		tp = (Shtable_t*)((char*)tp+size);
69 	}
70 	return(&empty);
71 }
72 
73 /*
74  *  shtab_options lookup routine
75  */
76 
77 #define sep(c)		((c)=='-'||(c)=='_')
78 
79 int sh_lookopt(register const char *sp, int *invert)
80 {
81 	register int			first;
82 	register const Shtable_t	*tp;
83 	register int			c;
84 	register const char		*s, *t, *sw, *tw;
85 	int				amb;
86 	int				hit;
87 	int				inv;
88 	int				no;
89 	if(sp==0)
90 		return(0);
91 	if(*sp=='n' && *(sp+1)=='o' && (*(sp+2)!='t' || *(sp+3)!='i'))
92 	{
93 		sp+=2;
94 		if(sep(*sp))
95 			sp++;
96 		*invert = !*invert;
97 	}
98 	if((first= *sp)==0)
99 		return(0);
100 	tp=shtab_options;
101 	amb=hit=0;
102 	for(;;)
103 	{
104 		t=tp->sh_name;
105 		if(no = *t=='n' && *(t+1)=='o' && *(t+2)!='t')
106 			t+=2;
107 		if(!(c= *t))
108 			break;
109 		if(first == c)
110 		{
111 			if(strcmp(sp,t)==0)
112 			{
113 				*invert ^= no;
114 				return(tp->sh_number);
115 			}
116 			s=sw=sp;
117 			tw=t;
118 			for(;;)
119 			{
120 				if(!*s || *s=='=')
121 				{
122 					if (*s == '=' && !strtol(s+1, NiL, 0))
123 						no = !no;
124 					if (!*t)
125 					{
126 						*invert ^= no;
127 						return(tp->sh_number);
128 					}
129 					if (hit || amb)
130 					{
131 						hit = 0;
132 						amb = 1;
133 					}
134 					else
135 					{
136 						hit = tp->sh_number;
137 						inv = no;
138 					}
139 					break;
140 				}
141 				else if(!*t)
142 					break;
143 				else if(sep(*s))
144 					sw = ++s;
145 				else if(sep(*t))
146 					tw = ++t;
147 				else if(*s==*t)
148 				{
149 					s++;
150 					t++;
151 				}
152 				else if(s==sw && t==tw)
153 					break;
154 				else
155 				{
156 					if(t!=tw)
157 					{
158 						while(*t && !sep(*t))
159 							t++;
160 						if(!*t)
161 							break;
162 						tw = ++t;
163 					}
164 					while (s>sw && *s!=*t)
165 						s--;
166 				}
167 			}
168 		}
169 		tp = (Shtable_t*)((char*)tp+sizeof(*shtab_options));
170 	}
171 	if(hit)
172 		*invert ^= inv;
173 	return(hit);
174 }
175 
176 /*
177  * look for the substring <oldsp> in <string> and replace with <newsp>
178  * The new string is put on top of the stack
179  */
180 char *sh_substitute(const char *string,const char *oldsp,char *newsp)
181 /*@
182 	assume string!=NULL && oldsp!=NULL && newsp!=NULL;
183 	return x satisfying x==NULL ||
184 		strlen(x)==(strlen(in string)+strlen(in newsp)-strlen(in oldsp));
185 @*/
186 {
187 	register const char *sp = string;
188 	register const char *cp;
189 	const char *savesp = 0;
190 	stakseek(0);
191 	if(*sp==0)
192 		return((char*)0);
193 	if(*(cp=oldsp) == 0)
194 		goto found;
195 #if SHOPT_MULTIBYTE
196 	mbinit();
197 #endif /* SHOPT_MULTIBYTE */
198 	do
199 	{
200 	/* skip to first character which matches start of oldsp */
201 		while(*sp && (savesp==sp || *sp != *cp))
202 		{
203 #if SHOPT_MULTIBYTE
204 			/* skip a whole character at a time */
205 			int c = mbsize(sp);
206 			if(c < 0)
207 				sp++;
208 			while(c-- > 0)
209 #endif /* SHOPT_MULTIBYTE */
210 			stakputc(*sp++);
211 		}
212 		if(*sp == 0)
213 			return((char*)0);
214 		savesp = sp;
215 	        for(;*cp;cp++)
216 		{
217 			if(*cp != *sp++)
218 				break;
219 		}
220 		if(*cp==0)
221 		/* match found */
222 			goto found;
223 		sp = savesp;
224 		cp = oldsp;
225 	}
226 	while(*sp);
227 	return((char*)0);
228 
229 found:
230 	/* copy new */
231 	stakputs(newsp);
232 	/* copy rest of string */
233 	stakputs(sp);
234 	return(stakfreeze(1));
235 }
236 
237 /*
238  * TRIM(sp)
239  * Remove escape characters from characters in <sp> and eliminate quoted nulls.
240  */
241 
242 void	sh_trim(register char *sp)
243 /*@
244 	assume sp!=NULL;
245 	promise  strlen(in sp) <= in strlen(sp);
246 @*/
247 {
248 	register char *dp;
249 	register int c;
250 	if(sp)
251 	{
252 		dp = sp;
253 		while(c= *sp)
254 		{
255 #if SHOPT_MULTIBYTE
256 			int len;
257 			if(mbwide() && (len=mbsize(sp))>1)
258 			{
259 				memmove(dp, sp, len);
260 				dp += len;
261 				sp += len;
262 				continue;
263 			}
264 #endif /* SHOPT_MULTIBYTE */
265 			sp++;
266 			if(c == '\\')
267 				c = *sp++;
268 			if(c)
269 				*dp++ = c;
270 		}
271 		*dp = 0;
272 	}
273 }
274 
275 /*
276  * copy <str1> to <str2> changing upper case to lower case
277  * <str2> must be big enough to hold <str1>
278  * <str1> and <str2> may point to the same place.
279  */
280 
281 void sh_utol(register char const *str1,register char *str2)
282 /*@
283 	assume str1!=0 && str2!=0
284 	return x satisfying strlen(in str1)==strlen(in str2);
285 @*/
286 {
287 	register int c;
288 	for(; c= *((unsigned char*)str1); str1++,str2++)
289 	{
290 		if(isupper(c))
291 			*str2 = tolower(c);
292 		else
293 			*str2 = c;
294 	}
295 	*str2 = 0;
296 }
297 
298 /*
299  * print <str> quoting chars so that it can be read by the shell
300  * puts null terminated result on stack, but doesn't freeze it
301  */
302 char	*sh_fmtq(const char *string)
303 {
304 	register const char *cp = string, *op;
305 	register int c, state;
306 	int offset;
307 	if(!cp)
308 		return((char*)0);
309 	offset = staktell();
310 #if SHOPT_MULTIBYTE
311 	state = ((c= mbchar(cp))==0);
312 #else
313 	state = ((c= *(unsigned char*)cp++)==0);
314 #endif
315 	if(isaletter(c))
316 	{
317 #if SHOPT_MULTIBYTE
318 		while((c=mbchar(cp)),isaname(c));
319 #else
320 		while((c = *(unsigned char*)cp++),isaname(c));
321 #endif
322 		if(c==0)
323 			return((char*)string);
324 		if(c=='=')
325 		{
326 			if(*cp==0)
327 				return((char*)string);
328 			c = cp - string;
329 			stakwrite(string,c);
330 			string = cp;
331 #if SHOPT_MULTIBYTE
332 			c = mbchar(cp);
333 #else
334 			c = *(unsigned char*)cp++;
335 #endif
336 		}
337 	}
338 	if(c==0 || c=='#' || c=='~')
339 		state = 1;
340 #if SHOPT_MULTIBYTE
341 	for(;c;c= mbchar(cp))
342 #else
343 	for(;c; c= *(unsigned char*)cp++)
344 #endif
345 	{
346 #if SHOPT_MULTIBYTE
347 		if(c=='\'' || !iswprint(c))
348 #else
349 		if(c=='\'' || !isprint(c))
350 #endif /* SHOPT_MULTIBYTE */
351 			state = 2;
352 		else if(c==']' || (c!=':' && c<=0xff && (c=sh_lexstates[ST_NORM][c]) && c!=S_EPAT))
353 			state |=1;
354 	}
355 	if(state<2)
356 	{
357 		if(state==1)
358 			stakputc('\'');
359 		if(c = --cp - string)
360 			stakwrite(string,c);
361 		if(state==1)
362 			stakputc('\'');
363 	}
364 	else
365 	{
366 		stakwrite("$'",2);
367 		cp = string;
368 #if SHOPT_MULTIBYTE
369 		while(op = cp, c= mbchar(cp))
370 #else
371 		while(op = cp, c= *(unsigned char*)cp++)
372 #endif
373 		{
374 			state=1;
375 			switch(c)
376 			{
377 			    case ('a'==97?'\033':39):
378 				c = 'E';
379 				break;
380 			    case '\n':
381 				c = 'n';
382 				break;
383 			    case '\r':
384 				c = 'r';
385 				break;
386 			    case '\t':
387 				c = 't';
388 				break;
389 			    case '\f':
390 				c = 'f';
391 				break;
392 			    case '\b':
393 				c = 'b';
394 				break;
395 			    case '\a':
396 				c = 'a';
397 				break;
398 			    case '\\':	case '\'':
399 				break;
400 			    default:
401 #if SHOPT_MULTIBYTE
402 				if(!iswprint(c))
403 				{
404 					while(op<cp)
405 						sfprintf(staksp,"\\%.3o",*(unsigned char*)op++);
406 					continue;
407 				}
408 #else
409 				if(!isprint(c))
410 				{
411 					sfprintf(staksp,"\\%.3o",c);
412 					continue;
413 				}
414 #endif
415 				state=0;
416 				break;
417 			}
418 			if(state)
419 			{
420 				stakputc('\\');
421 				stakputc(c);
422 			}
423 			else
424 				stakwrite(op, cp-op);
425 		}
426 		stakputc('\'');
427 	}
428 	stakputc(0);
429 	return(stakptr(offset));
430 }
431 
432 /*
433  * print <str> quoting chars so that it can be read by the shell
434  * puts null terminated result on stack, but doesn't freeze it
435  * single!=0 limits quoting to '...'
436  * fold>0 prints raw newlines and inserts appropriately
437  * escaped newlines every (fold-x) chars
438  */
439 char	*sh_fmtqf(const char *string, int single, int fold)
440 {
441 	register const char *cp = string;
442 	register const char *bp;
443 	register const char *vp;
444 	register int c;
445 	register int n;
446 	register int q;
447 	register int a;
448 	int offset;
449 
450 	if (--fold < 8)
451 		fold = 0;
452 	if (!cp || !*cp || !single && !fold || fold && strlen(string) < fold)
453 		return sh_fmtq(cp);
454 	offset = staktell();
455 	single = single ? 1 : 3;
456 	c = mbchar(string);
457 	a = isaletter(c) ? '=' : 0;
458 	vp = cp + 1;
459 	do
460 	{
461 		q = 0;
462 		n = fold;
463 		bp = cp;
464 		while ((!n || n-- > 0) && (c = mbchar(cp)))
465 		{
466 			if (a && !isaname(c))
467 				a = 0;
468 #if SHOPT_MULTIBYTE
469 			if (c >= 0x200)
470 				continue;
471 			if (c == '\'' || !iswprint(c))
472 #else
473 			if (c == '\'' || !isprint(c))
474 #endif /* SHOPT_MULTIBYTE */
475 			{
476 				q = single;
477 				break;
478 			}
479 			if (c == '\n')
480 				q = 1;
481 			else if (c == a)
482 			{
483 				stakwrite(bp, cp - bp);
484 				bp = cp;
485 				vp = cp + 1;
486 				a = 0;
487 			}
488 			else if ((c == '#' || c == '~') && cp == vp || c == ']' || c != ':' && (c = sh_lexstates[ST_NORM][c]) && c != S_EPAT)
489 				q = 1;
490 		}
491 		if (q & 2)
492 		{
493 			stakputc('$');
494 			stakputc('\'');
495 			cp = bp;
496 			n = fold - 3;
497 			q = 1;
498 			while (c = mbchar(cp))
499 			{
500 				switch (c)
501 				{
502 		    		case ('a'==97?'\033':39):
503 					c = 'E';
504 					break;
505 		    		case '\n':
506 					q = 0;
507 					n = fold - 1;
508 					break;
509 		    		case '\r':
510 					c = 'r';
511 					break;
512 		    		case '\t':
513 					c = 't';
514 					break;
515 		    		case '\f':
516 					c = 'f';
517 					break;
518 		    		case '\b':
519 					c = 'b';
520 					break;
521 		    		case '\a':
522 					c = 'a';
523 					break;
524 		    		case '\\':
525 					if (*cp == 'n')
526 					{
527 						c = '\n';
528 						q = 0;
529 						n = fold - 1;
530 						break;
531 					}
532 				case '\'':
533 					break;
534 		    		default:
535 #if SHOPT_MULTIBYTE
536 					if(!iswprint(c))
537 #else
538 					if(!isprint(c))
539 #endif
540 					{
541 						if ((n -= 4) <= 0)
542 						{
543 							stakwrite("'\\\n$'", 5);
544 							n = fold - 7;
545 						}
546 						sfprintf(staksp, "\\%03o", c);
547 						continue;
548 					}
549 					q = 0;
550 					break;
551 				}
552 				if ((n -= q + 1) <= 0)
553 				{
554 					if (!q)
555 					{
556 						stakputc('\'');
557 						cp = bp;
558 						break;
559 					}
560 					stakwrite("'\\\n$'", 5);
561 					n = fold - 5;
562 				}
563 				if (q)
564 					stakputc('\\');
565 				else
566 					q = 1;
567 				stakputc(c);
568 				bp = cp;
569 			}
570 			if (!c)
571 				stakputc('\'');
572 		}
573 		else if (q & 1)
574 		{
575 			stakputc('\'');
576 			cp = bp;
577 			n = fold ? (fold - 2) : 0;
578 			while (c = mbchar(cp))
579 			{
580 				if (c == '\n')
581 					n = fold - 1;
582 				else if (n && --n <= 0)
583 				{
584 					n = fold - 2;
585 					stakwrite(bp, --cp - bp);
586 					bp = cp;
587 					stakwrite("'\\\n'", 4);
588 				}
589 				else if (n == 1 && *cp == '\'')
590 				{
591 					n = fold - 5;
592 					stakwrite(bp, --cp - bp);
593 					bp = cp;
594 					stakwrite("'\\\n\\''", 6);
595 				}
596 				else if (c == '\'')
597 				{
598 					stakwrite(bp, cp - bp - 1);
599 					bp = cp;
600 					if (n && (n -= 4) <= 0)
601 					{
602 						n = fold - 5;
603 						stakwrite("'\\\n\\''", 6);
604 					}
605 					else
606 						stakwrite("'\\''", 4);
607 				}
608 			}
609 			stakwrite(bp, cp - bp - 1);
610 			stakputc('\'');
611 		}
612 		else if (n = fold)
613 		{
614 			cp = bp;
615 			while (c = mbchar(cp))
616 			{
617 				if (--n <= 0)
618 				{
619 					n = fold;
620 					stakwrite(bp, --cp - bp);
621 					bp = cp;
622 					stakwrite("\\\n", 2);
623 				}
624 			}
625 			stakwrite(bp, cp - bp - 1);
626 		}
627 		else
628 			stakwrite(bp, cp - bp);
629 		if (c)
630 		{
631 			stakputc('\\');
632 			stakputc('\n');
633 		}
634 	} while (c);
635 	stakputc(0);
636 	return(stakptr(offset));
637 }
638 
639 #if SHOPT_MULTIBYTE
640 	int sh_strchr(const char *string, register const char *dp)
641 	{
642 		wchar_t c, d;
643 		register const char *cp=string;
644 		mbinit();
645 		d = mbchar(dp);
646 		mbinit();
647 		while(c = mbchar(cp))
648 		{
649 			if(c==d)
650 				return(cp-string);
651 		}
652 		if(d==0)
653 			return(cp-string);
654 		return(-1);
655 	}
656 #endif /* SHOPT_MULTIBYTE */
657 
658 const char *_sh_translate(const char *message)
659 {
660 #if ERROR_VERSION >= 20000317L
661 	return(ERROR_translate(0,0,e_dict,message));
662 #else
663 #if ERROR_VERSION >= 20000101L
664 	return(ERROR_translate(e_dict,message));
665 #else
666 	return(ERROR_translate(message,1));
667 #endif
668 #endif
669 }
670 
671 /*
672  * change '['identifier']' to identifier
673  * character before <str> must be a '['
674  * returns pointer to last character
675  */
676 char *sh_checkid(char *str, char *last)
677 {
678 	register unsigned char *cp = (unsigned char*)str;
679 	register unsigned char *v = cp;
680 	register int c;
681 	if(c= *cp++,isaletter(c))
682 		while(c= *cp++,isaname(c));
683 	if(c==']' && (!last || ((char*)cp==last)))
684 	{
685 		/* eliminate [ and ] */
686 		while(v < cp)
687 		{
688 			v[-1] = *v;
689 			v++;
690 		}
691 		if(last)
692 			last -=2;
693 		else
694 		{
695 			while(*v)
696 			{
697 				v[-2] = *v;
698 				v++;
699 			}
700 			v[-2] = 0;
701 			last = (char*)v;
702 		}
703 	}
704 	return(last);
705 }
706 
707 #if	_AST_VERSION  <= 20000317L
708 char *fmtident(const char *string)
709 {
710 	return((char*)string);
711 }
712 #endif
713